From c2e8e8728eebcdfde2b5632577fceb884c983dc4 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Fri, 23 Sep 2022 10:59:43 -0500
Subject: [PATCH 001/162] Fix an issue where using num_rows and skip_rows on a
 parquet file containing a mix of nested and non-nested types would result in
 incorrect row counts for the non-nested types. Also optimizes the preprocess
 path so that non-nested types do not end up getting visited by the kernel.

---
 cpp/src/io/parquet/page_data.cu    | 62 +++++++++------------
 cpp/src/io/parquet/reader_impl.cu  | 59 +++++++++++---------
 cpp/src/io/parquet/reader_impl.hpp |  6 +-
 cpp/tests/io/parquet_test.cpp      | 89 +++++++++++++++++++++++++++++-
 4 files changed, 148 insertions(+), 68 deletions(-)
diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 531733a7df7..bdf6e6819cd 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -1384,7 +1384,6 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
 {
   // max nesting depth of the column
   int max_depth = s->col.max_nesting_depth;
-  // bool has_repetition = s->col.max_level[level_type::REPETITION] > 0 ? true : false;
   // how many input level values we've processed in the page so far
   int input_value_count = s->input_value_count;
   // how many leaf values we've processed in the page so far
@@ -1479,6 +1478,11 @@ __global__ void __launch_bounds__(block_size)
   int t                 = threadIdx.x;
   PageInfo* pp          = &pages[page_idx];
 
+  // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
+  // containing lists anywhere within).
+  bool has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
+  if (!has_repetition) { return; }
+
   if (!setupLocalPageInfo(s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX)) {
     return;
   }
@@ -1504,8 +1508,6 @@ __global__ void __launch_bounds__(block_size)
   }
   __syncthreads();
 
-  bool has_repetition = s->col.max_level[level_type::REPETITION] > 0;
-
   // optimization : it might be useful to have a version of gpuDecodeStream that could go wider than
   // 1 warp.  Currently it only uses 1 warp so that it can overlap work with the value decoding step
   // when in the actual value decoding kernel. However, during this preprocess step we have no such
@@ -1516,16 +1518,13 @@ __global__ void __launch_bounds__(block_size)
     while (!s->error && s->input_value_count < s->num_input_values) {
       // decode repetition and definition levels. these will attempt to decode at
       // least up to the target, but may decode a few more.
-      if (has_repetition) {
-        gpuDecodeStream(s->rep, s, target_input_count, t, level_type::REPETITION);
-      }
+      gpuDecodeStream(s->rep, s, target_input_count, t, level_type::REPETITION);
       gpuDecodeStream(s->def, s, target_input_count, t, level_type::DEFINITION);
       __syncwarp();
 
       // we may have decoded different amounts from each stream, so only process what we've been
-      int actual_input_count = has_repetition ? min(s->lvl_count[level_type::REPETITION],
-                                                    s->lvl_count[level_type::DEFINITION])
-                                              : s->lvl_count[level_type::DEFINITION];
+      int actual_input_count =
+        min(s->lvl_count[level_type::REPETITION], s->lvl_count[level_type::DEFINITION]);
 
       // process what we got back
       gpuUpdatePageSizes(s, actual_input_count, t, trim_pass);
@@ -1840,26 +1839,14 @@ void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
           return page.nesting[l_idx].size;
         });
 
-      // compute column size.
+      // if this buffer is part of a list hierarchy, we need to determine it's
+      // final size and allocate it here.
+      //
       // for struct columns, higher levels of the output columns are shared between input
       // columns. so don't compute any given level more than once.
-      if (out_buf.size == 0) {
+      if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) && out_buf.size == 0) {
         int size = thrust::reduce(rmm::exec_policy(stream), size_input, size_input + pages.size());
 
-        // Handle a specific corner case.  It is possible to construct a parquet file such that
-        // a column within a row group contains more rows than the row group itself. This may be
-        // invalid, but we have seen instances of this in the wild, including how they were created
-        // using the apache parquet tools.  Normally, the trim pass would handle this case quietly,
-        // but if we are not running the trim pass (which is most of the time) we need to cap the
-        // number of rows we will allocate/read from the file with the amount specified in the
-        // associated row group. This only applies to columns that are not children of lists as
-        // those may have an arbitrary number of rows in them.
-        if (!uses_custom_row_bounds &&
-            !(out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) &&
-            size > static_cast<size_type>(num_rows)) {
-          size = static_cast<size_type>(num_rows);
-        }
-
         // if this is a list column add 1 for non-leaf levels for the terminating offset
         if (out_buf.type.id() == type_id::LIST && l_idx < max_depth) { size++; }
 
@@ -1867,16 +1854,21 @@ void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
         out_buf.create(size, stream, mr);
       }
 
-      // compute per-page start offset
-      thrust::exclusive_scan_by_key(rmm::exec_policy(stream),
-                                    page_keys.begin(),
-                                    page_keys.end(),
-                                    size_input,
-                                    start_offset_output_iterator{pages.device_ptr(),
-                                                                 page_index.begin(),
-                                                                 0,
-                                                                 static_cast<int>(src_col_schema),
-                                                                 static_cast<int>(l_idx)});
+      // for nested hierarchies, compute per-page start offset.
+      // it would be better/safer to be checking (schema.max_repetition_level > 0) here, but there's
+      // no easy way to get at that info here. we'd have to move this function into reader_impl.cu
+      if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) ||
+          out_buf.type.id() == type_id::LIST) {
+        thrust::exclusive_scan_by_key(rmm::exec_policy(stream),
+                                      page_keys.begin(),
+                                      page_keys.end(),
+                                      size_input,
+                                      start_offset_output_iterator{pages.device_ptr(),
+                                                                   page_index.begin(),
+                                                                   0,
+                                                                   static_cast<int>(src_col_schema),
+                                                                   static_cast<int>(l_idx)});
+      }
     }
   }
 
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 59bef6f5600..8be0b388f0e 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1353,26 +1353,39 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
                                       hostdevice_vector<gpu::PageInfo>& pages,
                                       size_t min_row,
                                       size_t total_rows,
-                                      bool uses_custom_row_bounds,
-                                      bool has_lists)
+                                      bool uses_custom_row_bounds)
 {
-  // TODO : we should be selectively preprocessing only columns that have
-  // lists in them instead of doing them all if even one contains lists.
-
-  // if there are no lists, simply allocate every allocate every output
-  // column to be of size num_rows
-  if (!has_lists) {
-    std::function<void(std::vector<column_buffer>&)> create_columns =
-      [&](std::vector<column_buffer>& cols) {
-        for (size_t idx = 0; idx < cols.size(); idx++) {
-          auto& col = cols[idx];
-          col.create(total_rows, _stream, _mr);
-          create_columns(col.children);
-        }
-      };
-    create_columns(_output_columns);
-  } else {
-    // preprocess per-nesting level sizes by page
+  // iterate over all input columns and allocate any associated output
+  // buffers if they are not part of a list hierarchy. mark down
+  // if we have any list columns that need further processing.
+  bool has_lists = false;
+  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+    auto const& input_col = _input_columns[idx];
+    size_t max_depth      = input_col.nesting_depth();
+
+    auto* cols = &_output_columns;
+    for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
+      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+      cols          = &out_buf.children;
+
+      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
+      // to know how big this buffer actually is.
+      if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
+        has_lists = true;
+      }
+      // if we haven't already processed this column because it is part of a struct hierarchy
+      else if (out_buf.size == 0) {
+        // add 1 for the offset if this is a list column
+        out_buf.create(
+          out_buf.type.id() == type_id::LIST && l_idx < max_depth ? total_rows + 1 : total_rows,
+          _stream,
+          _mr);
+      }
+    }
+  }
+
+  // if we have columns containing lists, further preprocessing is necessary.
+  if (has_lists) {
     gpu::PreprocessColumnData(pages,
                               chunks,
                               _input_columns,
@@ -1636,9 +1649,6 @@ table_with_metadata reader::impl::read(size_type skip_rows,
     // Keep track of column chunk file offsets
     std::vector<size_t> column_chunk_offsets(num_chunks);
 
-    // if there are lists present, we need to preprocess
-    bool has_lists = false;
-
     // Initialize column chunk information
     size_t total_decompressed_size = 0;
     auto remaining_rows            = num_rows;
@@ -1657,9 +1667,6 @@ table_with_metadata reader::impl::read(size_type skip_rows,
         auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx);
         auto& schema   = _metadata->get_schema(col.schema_idx);
 
-        // this column contains repetition levels and will require a preprocess
-        if (schema.max_repetition_level > 0) { has_lists = true; }
-
         auto [type_width, clock_rate, converted_type] =
           conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()),
                           _timestamp_type.id(),
@@ -1755,7 +1762,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
       //
       // - for nested schemas, output buffer offset values per-page, per nesting-level for the
       // purposes of decoding.
-      preprocess_columns(chunks, pages, skip_rows, num_rows, uses_custom_row_bounds, has_lists);
+      preprocess_columns(chunks, pages, skip_rows, num_rows, uses_custom_row_bounds);
 
       // decoding of column data itself
       decode_page_data(chunks, pages, page_nesting_info, skip_rows, num_rows);
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index e1f275bb8e8..6c3e05b4264 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -148,7 +148,7 @@ class reader::impl {
                              hostdevice_vector<gpu::PageNestingInfo>& page_nesting_info);
 
   /**
-   * @brief Preprocess column information for nested schemas.
+   * @brief Preprocess column information and allocate output buffers.
    *
    * There are several pieces of information we can't compute directly from row counts in
    * the parquet headers when dealing with nested schemas.
@@ -163,15 +163,13 @@ class reader::impl {
    * @param total_rows Maximum number of rows to read
    * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific
    * bounds
-   * @param has_lists Whether or not this data contains lists and requires
    * a preprocess.
    */
   void preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                           hostdevice_vector<gpu::PageInfo>& pages,
                           size_t min_row,
                           size_t total_rows,
-                          bool uses_custom_row_bounds,
-                          bool has_lists);
+                          bool uses_custom_row_bounds);
 
   /**
    * @brief Converts the page data and outputs to columns.
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index c5000bc0add..8a6ccdc9fbc 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -114,7 +114,7 @@ std::unique_ptr<cudf::table> create_compressible_fixed_table(cudf::size_type num
 // this function replicates the "list_gen" function in
 // python/cudf/cudf/tests/test_parquet.py
 template <typename T>
-std::unique_ptr<cudf::column> make_parquet_list_col(
+std::unique_ptr<cudf::column> make_parquet_list_list_col(
   int skip_rows, int num_rows, int lists_per_row, int list_size, bool include_validity)
 {
   auto valids =
@@ -2219,8 +2219,8 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize)
 
   bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
                  true,  true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true};
 
+                 true,  true, true, true, true, true, true, true, true};
   T c1a[num_els];
   std::fill(c1a, c1a + num_els, static_cast<T>(5));
   T c1b[num_els];
@@ -2608,6 +2608,89 @@ TEST_F(ParquetReaderTest, UserBoundsWithNulls)
   }
 }
 
+TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
+{
+  constexpr int num_rows = 32 * 1024;
+
+  std::mt19937 gen(6542);
+  std::bernoulli_distribution bn(0.7f);
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
+  auto values = thrust::make_counting_iterator(0);
+
+  // int64
+  cudf::test::fixed_width_column_wrapper<int64_t> c0(values, values + num_rows, valids);
+
+  // list<float>
+  constexpr int floats_per_row = 4;
+  auto c1_offset_iter          = cudf::detail::make_counting_transform_iterator(
+    0, [floats_per_row](cudf::size_type idx) { return idx * floats_per_row; });
+  cudf::test::fixed_width_column_wrapper<cudf::offset_type> c1_offsets(
+    c1_offset_iter, c1_offset_iter + num_rows + 1);
+  cudf::test::fixed_width_column_wrapper<float> c1_floats(
+    values, values + (num_rows * floats_per_row), valids);
+  auto c1 = cudf::make_lists_column(num_rows,
+                                    c1_offsets.release(),
+                                    c1_floats.release(),
+                                    cudf::UNKNOWN_NULL_COUNT,
+                                    cudf::test::detail::make_null_mask(valids, valids + num_rows));
+
+  // list<list<int>>
+  auto c2 = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
+
+  // struct<list<string>, int, float>
+  std::vector<std::string> strings{
+    "abc", "x", "bananas", "gpu", "minty", "backspace", "", "cayenne", "turbine", "soft"};
+  std::uniform_int_distribution<int> uni(0, strings.size() - 1);
+  auto string_iter = cudf::detail::make_counting_transform_iterator(
+    0, [&](cudf::size_type idx) { return strings[uni(gen)]; });
+  constexpr int string_per_row  = 3;
+  constexpr int num_string_rows = num_rows * string_per_row;
+  cudf::test::strings_column_wrapper string_col{string_iter, string_iter + num_string_rows};
+  auto offset_iter = cudf::detail::make_counting_transform_iterator(
+    0, [string_per_row](cudf::size_type idx) { return idx * string_per_row; });
+  cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets(offset_iter,
+                                                                    offset_iter + num_rows + 1);
+  auto c3_list =
+    cudf::make_lists_column(num_rows,
+                            offsets.release(),
+                            string_col.release(),
+                            cudf::UNKNOWN_NULL_COUNT,
+                            cudf::test::detail::make_null_mask(valids, valids + num_rows));
+  cudf::test::fixed_width_column_wrapper<int> c3_ints(values, values + num_rows, valids);
+  cudf::test::fixed_width_column_wrapper<float> c3_floats(values, values + num_rows, valids);
+  std::vector<std::unique_ptr<cudf::column>> c3_children;
+  c3_children.push_back(std::move(c3_list));
+  c3_children.push_back(c3_ints.release());
+  c3_children.push_back(c3_floats.release());
+  cudf::test::structs_column_wrapper c3(std::move(c3_children));
+
+  // write it out
+  cudf::table_view tbl({c0, *c1, *c2, c3});
+  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsMixedTypes.parquet");
+  cudf_io::parquet_writer_options out_args =
+    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl);
+  cudf_io::write_parquet(out_args);
+
+  // read it back
+  std::vector<std::pair<int, int>> params{
+    {-1, -1}, {0, num_rows}, {1, num_rows - 1}, {num_rows - 1, 1}, {517, 22000}};
+  for (auto p : params) {
+    cudf_io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath});
+    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
+    if (p.second >= 0) { read_args.set_num_rows(p.second); }
+    auto result = cudf_io::read_parquet(read_args);
+
+    p.first  = p.first < 0 ? 0 : p.first;
+    p.second = p.second < 0 ? num_rows - p.first : p.second;
+    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
+    auto expected = cudf::slice(tbl, slice_indices);
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected[0]);
+  }
+}
+
 TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge)
 {
   constexpr int num_rows = 30 * 1000000;
@@ -2655,7 +2738,7 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge)
 TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge)
 {
   constexpr int num_rows = 5 * 1000000;
-  auto colp              = make_parquet_list_col<int>(0, num_rows, 5, 8, true);
+  auto colp              = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
   cudf::column_view col  = *colp;
 
   // this file will have row groups of 1,000,000 each

From eadfd63c569445f3885f14922645b1e747f21ed1 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Tue, 27 Sep 2022 17:50:10 -0500
Subject: [PATCH 002/162] Fixed an issue with the tests:  input columns cannot
 have unsanitary lists.  Fixed an additional issue in the decoding where flat
 column types underneath structs could end up ignoring skip_rows/num_rows.

---
 cpp/src/io/parquet/page_data.cu | 16 +++++++++++-----
 cpp/tests/io/parquet_test.cpp   | 19 +++++++++++--------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index bdf6e6819cd..107c54b6872 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -1175,7 +1175,8 @@ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_inpu
                                                              int t)
 {
   // max nesting depth of the column
-  int const max_depth = s->col.max_nesting_depth;
+  int const max_depth       = s->col.max_nesting_depth;
+  bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0;
   // how many (input) values we've processed in the page so far
   int input_value_count = s->input_value_count;
   // how many rows we've processed in the page so far
@@ -1235,7 +1236,7 @@ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_inpu
       uint32_t const warp_valid_mask =
         // for flat schemas, a simple ballot_sync gives us the correct count and bit positions
         // because every value in the input matches to a value in the output
-        max_depth == 1
+        !has_repetition
           ? ballot(is_valid)
           :
           // for nested schemas, it's more complicated.  This warp will visit 32 incoming values,
@@ -1284,11 +1285,12 @@ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_inpu
       // the correct position to start reading. since we are about to write the validity vector here
       // we need to adjust our computed mask to take into account the write row bounds.
       int const in_write_row_bounds =
-        max_depth == 1
+        !has_repetition
           ? thread_row_index >= s->first_row && thread_row_index < (s->first_row + s->num_rows)
           : in_row_bounds;
       int const first_thread_in_write_range =
-        max_depth == 1 ? __ffs(ballot(in_write_row_bounds)) - 1 : 0;
+        !has_repetition ? __ffs(ballot(in_write_row_bounds)) - 1 : 0;
+
       // # of bits to of the validity mask to write out
       int const warp_valid_mask_bit_count =
         first_thread_in_write_range < 0 ? 0 : warp_value_count - first_thread_in_write_range;
@@ -1572,6 +1574,8 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
       ((s->col.data_type & 7) == BOOLEAN || (s->col.data_type & 7) == BYTE_ARRAY) ? 64 : 32;
   }
 
+  bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0;
+
   // skipped_leaf_values will always be 0 for flat hierarchies.
   uint32_t skipped_leaf_values = s->page.skipped_leaf_values;
   while (!s->error && (s->input_value_count < s->num_input_values || s->src_pos < s->nz_count)) {
@@ -1624,7 +1628,7 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
       // - so we will end up ignoring the first two input rows, and input rows 2..n will
       //   get written to the output starting at position 0.
       //
-      if (s->col.max_nesting_depth == 1) { dst_pos -= s->first_row; }
+      if (!has_repetition) { dst_pos -= s->first_row; }
 
       // target_pos will always be properly bounded by num_rows, but dst_pos may be negative (values
       // before first_row) in the flat hierarchy case.
@@ -1764,6 +1768,8 @@ void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
 
   // computes:
   // PageInfo::chunk_row for all pages
+  // Note: this is doing some redundant work for pages in flat hierarchies.  chunk_row has already
+  // been computed during header decoding. the overall amount of work here is very small though.
   auto key_input = thrust::make_transform_iterator(
     pages.device_ptr(), [] __device__(PageInfo const& page) { return page.chunk_idx; });
   auto page_input = thrust::make_transform_iterator(
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 8a6ccdc9fbc..b41452459b0 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -2629,11 +2629,12 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
     c1_offset_iter, c1_offset_iter + num_rows + 1);
   cudf::test::fixed_width_column_wrapper<float> c1_floats(
     values, values + (num_rows * floats_per_row), valids);
-  auto c1 = cudf::make_lists_column(num_rows,
-                                    c1_offsets.release(),
-                                    c1_floats.release(),
-                                    cudf::UNKNOWN_NULL_COUNT,
-                                    cudf::test::detail::make_null_mask(valids, valids + num_rows));
+  auto _c1 = cudf::make_lists_column(num_rows,
+                                     c1_offsets.release(),
+                                     c1_floats.release(),
+                                     cudf::UNKNOWN_NULL_COUNT,
+                                     cudf::test::detail::make_null_mask(valids, valids + num_rows));
+  auto c1  = cudf::purge_nonempty_nulls(static_cast<cudf::lists_column_view>(*_c1));
 
   // list<list<int>>
   auto c2 = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
@@ -2651,22 +2652,24 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
     0, [string_per_row](cudf::size_type idx) { return idx * string_per_row; });
   cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets(offset_iter,
                                                                     offset_iter + num_rows + 1);
-  auto c3_list =
+  auto _c3_list =
     cudf::make_lists_column(num_rows,
                             offsets.release(),
                             string_col.release(),
                             cudf::UNKNOWN_NULL_COUNT,
                             cudf::test::detail::make_null_mask(valids, valids + num_rows));
+  auto c3_list = cudf::purge_nonempty_nulls(static_cast<cudf::lists_column_view>(*_c3_list));
   cudf::test::fixed_width_column_wrapper<int> c3_ints(values, values + num_rows, valids);
   cudf::test::fixed_width_column_wrapper<float> c3_floats(values, values + num_rows, valids);
   std::vector<std::unique_ptr<cudf::column>> c3_children;
   c3_children.push_back(std::move(c3_list));
   c3_children.push_back(c3_ints.release());
   c3_children.push_back(c3_floats.release());
-  cudf::test::structs_column_wrapper c3(std::move(c3_children));
+  cudf::test::structs_column_wrapper _c3(std::move(c3_children));
+  auto c3 = cudf::purge_nonempty_nulls(static_cast<cudf::structs_column_view>(_c3));
 
   // write it out
-  cudf::table_view tbl({c0, *c1, *c2, c3});
+  cudf::table_view tbl({c0, *c1, *c2, *c3});
   auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsMixedTypes.parquet");
   cudf_io::parquet_writer_options out_args =
     cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl);

From 222c9fe3979cbb28556e56b1727354cda7640ee4 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 30 Sep 2022 15:26:27 -0700
Subject: [PATCH 003/162] Copy `parquet_reader_*` into
 `chunked_parquet_reader_*`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 337 ++++++++++++++++++++++++++++++++
 1 file changed, 337 insertions(+)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index ff5b9f5c457..f91d583cb76 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -400,6 +400,343 @@ table_with_metadata read_parquet(
   parquet_reader_options const& options,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+// chunked reader stuff
+class chunked_parquet_reader_options_builder;
+
+/**
+ * @brief Settings for `read_parquet()`.
+ */
+class chunked_parquet_reader_options {
+  source_info _source;
+
+  // Path in schema of column to read; `nullopt` is all
+  std::optional<std::vector<std::string>> _columns;
+
+  // List of individual row groups to read (ignored if empty)
+  std::vector<std::vector<size_type>> _row_groups;
+  // Number of rows to skip from the start
+  size_type _skip_rows = 0;
+  // Number of rows to read; -1 is all
+  size_type _num_rows = -1;
+
+  // Whether to store string data as categorical type
+  bool _convert_strings_to_categories = false;
+  // Whether to use PANDAS metadata to load columns
+  bool _use_pandas_metadata = true;
+  // Cast timestamp columns to a specific type
+  data_type _timestamp_type{type_id::EMPTY};
+
+  std::optional<std::vector<reader_column_schema>> _reader_column_schema;
+
+  /**
+   * @brief Constructor from source info.
+   *
+   * @param src source information used to read parquet file
+   */
+  explicit chunked_parquet_reader_options(source_info const& src) : _source(src) {}
+
+  friend chunked_parquet_reader_options_builder;
+
+ public:
+  /**
+   * @brief Default constructor.
+   *
+   * This has been added since Cython requires a default constructor to create objects on stack.
+   */
+  explicit chunked_parquet_reader_options() = default;
+
+  /**
+   * @brief Creates a parquet_reader_options_builder which will build parquet_reader_options.
+   *
+   * @param src Source information to read parquet file
+   * @return Builder to build reader options
+   */
+  static chunked_parquet_reader_options_builder builder(source_info const& src);
+
+  /**
+   * @brief Returns source info.
+   *
+   * @return Source info
+   */
+  [[nodiscard]] source_info const& get_source() const { return _source; }
+
+  /**
+   * @brief Returns true/false depending on whether strings should be converted to categories or
+   * not.
+   *
+   * @return `true` if strings should be converted to categories
+   */
+  [[nodiscard]] bool is_enabled_convert_strings_to_categories() const
+  {
+    return _convert_strings_to_categories;
+  }
+
+  /**
+   * @brief Returns true/false depending whether to use pandas metadata or not while reading.
+   *
+   * @return `true` if pandas metadata is used while reading
+   */
+  [[nodiscard]] bool is_enabled_use_pandas_metadata() const { return _use_pandas_metadata; }
+
+  /**
+   * @brief Returns optional tree of metadata.
+   *
+   * @return vector of reader_column_schema objects.
+   */
+  [[nodiscard]] std::optional<std::vector<reader_column_schema>> get_column_schema() const
+  {
+    return _reader_column_schema;
+  }
+
+  /**
+   * @brief Returns number of rows to skip from the start.
+   *
+   * @return Number of rows to skip from the start
+   */
+  [[nodiscard]] size_type get_skip_rows() const { return _skip_rows; }
+
+  /**
+   * @brief Returns number of rows to read.
+   *
+   * @return Number of rows to read
+   */
+  [[nodiscard]] size_type get_num_rows() const { return _num_rows; }
+
+  /**
+   * @brief Returns names of column to be read, if set.
+   *
+   * @return Names of column to be read; `nullopt` if the option is not set
+   */
+  [[nodiscard]] auto const& get_columns() const { return _columns; }
+
+  /**
+   * @brief Returns list of individual row groups to be read.
+   *
+   * @return List of individual row groups to be read
+   */
+  [[nodiscard]] auto const& get_row_groups() const { return _row_groups; }
+
+  /**
+   * @brief Returns timestamp type used to cast timestamp columns.
+   *
+   * @return Timestamp type used to cast timestamp columns
+   */
+  data_type get_timestamp_type() const { return _timestamp_type; }
+
+  /**
+   * @brief Sets names of the columns to be read.
+   *
+   * @param col_names Vector of column names
+   */
+  void set_columns(std::vector<std::string> col_names) { _columns = std::move(col_names); }
+
+  /**
+   * @brief Sets vector of individual row groups to read.
+   *
+   * @param row_groups Vector of row groups to read
+   */
+  void set_row_groups(std::vector<std::vector<size_type>> row_groups)
+  {
+    if ((!row_groups.empty()) and ((_skip_rows != 0) or (_num_rows != -1))) {
+      CUDF_FAIL("row_groups can't be set along with skip_rows and num_rows");
+    }
+
+    _row_groups = std::move(row_groups);
+  }
+
+  /**
+   * @brief Sets to enable/disable conversion of strings to categories.
+   *
+   * @param val Boolean value to enable/disable conversion of string columns to categories
+   */
+  void enable_convert_strings_to_categories(bool val) { _convert_strings_to_categories = val; }
+
+  /**
+   * @brief Sets to enable/disable use of pandas metadata to read.
+   *
+   * @param val Boolean value whether to use pandas metadata
+   */
+  void enable_use_pandas_metadata(bool val) { _use_pandas_metadata = val; }
+
+  /**
+   * @brief Sets reader column schema.
+   *
+   * @param val Tree of schema nodes to enable/disable conversion of binary to string columns.
+   * Note default is to convert to string columns.
+   */
+  void set_column_schema(std::vector<reader_column_schema> val)
+  {
+    _reader_column_schema = std::move(val);
+  }
+
+  /**
+   * @brief Sets number of rows to skip.
+   *
+   * @param val Number of rows to skip from start
+   */
+  void set_skip_rows(size_type val)
+  {
+    if ((val != 0) and (!_row_groups.empty())) {
+      CUDF_FAIL("skip_rows can't be set along with a non-empty row_groups");
+    }
+
+    _skip_rows = val;
+  }
+
+  /**
+   * @brief Sets number of rows to read.
+   *
+   * @param val Number of rows to read after skip
+   */
+  void set_num_rows(size_type val)
+  {
+    if ((val != -1) and (!_row_groups.empty())) {
+      CUDF_FAIL("num_rows can't be set along with a non-empty row_groups");
+    }
+
+    _num_rows = val;
+  }
+
+  /**
+   * @brief Sets timestamp_type used to cast timestamp columns.
+   *
+   * @param type The timestamp data_type to which all timestamp columns need to be cast
+   */
+  void set_timestamp_type(data_type type) { _timestamp_type = type; }
+};
+
+/**
+ * @brief Builds parquet_reader_options to use for `read_parquet()`.
+ */
+class chunked_parquet_reader_options_builder {
+  chunked_parquet_reader_options options;
+
+ public:
+  /**
+   * @brief Default constructor.
+   *
+   * This has been added since Cython requires a default constructor to create objects on stack.
+   */
+  chunked_parquet_reader_options_builder() = default;
+
+  /**
+   * @brief Constructor from source info.
+   *
+   * @param src The source information used to read parquet file
+   */
+  explicit chunked_parquet_reader_options_builder(source_info const& src) : options(src) {}
+
+  /**
+   * @brief Sets names of the columns to be read.
+   *
+   * @param col_names Vector of column names
+   * @return this for chaining
+   */
+  chunked_parquet_reader_options_builder& columns(std::vector<std::string> col_names)
+  {
+    options._columns = std::move(col_names);
+    return *this;
+  }
+
+  /**
+   * @brief Sets vector of individual row groups to read.
+   *
+   * @param row_groups Vector of row groups to read
+   * @return this for chaining
+   */
+  chunked_parquet_reader_options_builder& row_groups(std::vector<std::vector<size_type>> row_groups)
+  {
+    options.set_row_groups(std::move(row_groups));
+    return *this;
+  }
+
+  /**
+   * @brief Sets enable/disable conversion of strings to categories.
+   *
+   * @param val Boolean value to enable/disable conversion of string columns to categories
+   * @return this for chaining
+   */
+  chunked_parquet_reader_options_builder& convert_strings_to_categories(bool val)
+  {
+    options._convert_strings_to_categories = val;
+    return *this;
+  }
+
+  /**
+   * @brief Sets to enable/disable use of pandas metadata to read.
+   *
+   * @param val Boolean value whether to use pandas metadata
+   * @return this for chaining
+   */
+  chunked_parquet_reader_options_builder& use_pandas_metadata(bool val)
+  {
+    options._use_pandas_metadata = val;
+    return *this;
+  }
+
+  /**
+   * @brief Sets reader metadata.
+   *
+   * @param val Tree of metadata information.
+   * @return this for chaining
+   */
+  chunked_parquet_reader_options_builder& set_column_schema(std::vector<reader_column_schema> val)
+  {
+    options._reader_column_schema = std::move(val);
+    return *this;
+  }
+
+  /**
+   * @brief Sets number of rows to skip.
+   *
+   * @param val Number of rows to skip from start
+   * @return this for chaining
+   */
+  chunked_parquet_reader_options_builder& skip_rows(size_type val)
+  {
+    options.set_skip_rows(val);
+    return *this;
+  }
+
+  /**
+   * @brief Sets number of rows to read.
+   *
+   * @param val Number of rows to read after skip
+   * @return this for chaining
+   */
+  chunked_parquet_reader_options_builder& num_rows(size_type val)
+  {
+    options.set_num_rows(val);
+    return *this;
+  }
+
+  /**
+   * @brief timestamp_type used to cast timestamp columns.
+   *
+   * @param type The timestamp data_type to which all timestamp columns need to be cast
+   * @return this for chaining
+   */
+  chunked_parquet_reader_options_builder& timestamp_type(data_type type)
+  {
+    options._timestamp_type = type;
+    return *this;
+  }
+
+  /**
+   * @brief move parquet_reader_options member once it's built.
+   */
+  operator chunked_parquet_reader_options&&() { return std::move(options); }
+
+  /**
+   * @brief move parquet_reader_options member once it's built.
+   *
+   * This has been added since Cython does not support overloading of conversion operators.
+   *
+   * @return Built `parquet_reader_options` object's r-value reference
+   */
+  chunked_parquet_reader_options&& build() { return std::move(options); }
+};
+
 /** @} */  // end of group
 /**
  * @addtogroup io_writers

From f49cfed3d8b3ba80f60f93966c8049fd923224af Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 30 Sep 2022 15:45:29 -0700
Subject: [PATCH 004/162] Modify `chunked_parquet_reader_options`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 199 ++------------------------------
 1 file changed, 12 insertions(+), 187 deletions(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index f91d583cb76..434207f144e 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -404,205 +404,30 @@ table_with_metadata read_parquet(
 class chunked_parquet_reader_options_builder;
 
 /**
- * @brief Settings for `read_parquet()`.
+ * @brief Settings for `chunked_parquet_reader`.
  */
-class chunked_parquet_reader_options {
-  source_info _source;
-
-  // Path in schema of column to read; `nullopt` is all
-  std::optional<std::vector<std::string>> _columns;
-
-  // List of individual row groups to read (ignored if empty)
-  std::vector<std::vector<size_type>> _row_groups;
-  // Number of rows to skip from the start
-  size_type _skip_rows = 0;
-  // Number of rows to read; -1 is all
-  size_type _num_rows = -1;
-
-  // Whether to store string data as categorical type
-  bool _convert_strings_to_categories = false;
-  // Whether to use PANDAS metadata to load columns
-  bool _use_pandas_metadata = true;
-  // Cast timestamp columns to a specific type
-  data_type _timestamp_type{type_id::EMPTY};
-
-  std::optional<std::vector<reader_column_schema>> _reader_column_schema;
-
-  /**
-   * @brief Constructor from source info.
-   *
-   * @param src source information used to read parquet file
-   */
-  explicit chunked_parquet_reader_options(source_info const& src) : _source(src) {}
+class chunked_parquet_reader_options : public parquet_reader_options {
+  // Limit the number of maximum bytes that chunked_parquet_reader will read each time.
+  std::size_t _byte_limit;
 
   friend chunked_parquet_reader_options_builder;
 
  public:
   /**
-   * @brief Default constructor.
-   *
-   * This has been added since Cython requires a default constructor to create objects on stack.
-   */
-  explicit chunked_parquet_reader_options() = default;
-
-  /**
-   * @brief Creates a parquet_reader_options_builder which will build parquet_reader_options.
-   *
-   * @param src Source information to read parquet file
-   * @return Builder to build reader options
-   */
-  static chunked_parquet_reader_options_builder builder(source_info const& src);
-
-  /**
-   * @brief Returns source info.
-   *
-   * @return Source info
-   */
-  [[nodiscard]] source_info const& get_source() const { return _source; }
-
-  /**
-   * @brief Returns true/false depending on whether strings should be converted to categories or
-   * not.
-   *
-   * @return `true` if strings should be converted to categories
-   */
-  [[nodiscard]] bool is_enabled_convert_strings_to_categories() const
-  {
-    return _convert_strings_to_categories;
-  }
-
-  /**
-   * @brief Returns true/false depending whether to use pandas metadata or not while reading.
-   *
-   * @return `true` if pandas metadata is used while reading
-   */
-  [[nodiscard]] bool is_enabled_use_pandas_metadata() const { return _use_pandas_metadata; }
-
-  /**
-   * @brief Returns optional tree of metadata.
-   *
-   * @return vector of reader_column_schema objects.
-   */
-  [[nodiscard]] std::optional<std::vector<reader_column_schema>> get_column_schema() const
-  {
-    return _reader_column_schema;
-  }
-
-  /**
-   * @brief Returns number of rows to skip from the start.
+   * @brief Return the maximum number of bytes that will be read by
+   * `chunked_parquet_reader::read_next()`.
    *
-   * @return Number of rows to skip from the start
+   * @return Number of maximum bytes to read
    */
-  [[nodiscard]] size_type get_skip_rows() const { return _skip_rows; }
+  [[nodiscard]] size_type get_byte_limit() const { return _byte_limit; }
 
   /**
-   * @brief Returns number of rows to read.
+   * @brief Sets the maximum number of bytes that will be read by
+   * `chunked_parquet_reader::read_next()`.
    *
-   * @return Number of rows to read
+   * @param byte_limit Number of maximum bytes to read
    */
-  [[nodiscard]] size_type get_num_rows() const { return _num_rows; }
-
-  /**
-   * @brief Returns names of column to be read, if set.
-   *
-   * @return Names of column to be read; `nullopt` if the option is not set
-   */
-  [[nodiscard]] auto const& get_columns() const { return _columns; }
-
-  /**
-   * @brief Returns list of individual row groups to be read.
-   *
-   * @return List of individual row groups to be read
-   */
-  [[nodiscard]] auto const& get_row_groups() const { return _row_groups; }
-
-  /**
-   * @brief Returns timestamp type used to cast timestamp columns.
-   *
-   * @return Timestamp type used to cast timestamp columns
-   */
-  data_type get_timestamp_type() const { return _timestamp_type; }
-
-  /**
-   * @brief Sets names of the columns to be read.
-   *
-   * @param col_names Vector of column names
-   */
-  void set_columns(std::vector<std::string> col_names) { _columns = std::move(col_names); }
-
-  /**
-   * @brief Sets vector of individual row groups to read.
-   *
-   * @param row_groups Vector of row groups to read
-   */
-  void set_row_groups(std::vector<std::vector<size_type>> row_groups)
-  {
-    if ((!row_groups.empty()) and ((_skip_rows != 0) or (_num_rows != -1))) {
-      CUDF_FAIL("row_groups can't be set along with skip_rows and num_rows");
-    }
-
-    _row_groups = std::move(row_groups);
-  }
-
-  /**
-   * @brief Sets to enable/disable conversion of strings to categories.
-   *
-   * @param val Boolean value to enable/disable conversion of string columns to categories
-   */
-  void enable_convert_strings_to_categories(bool val) { _convert_strings_to_categories = val; }
-
-  /**
-   * @brief Sets to enable/disable use of pandas metadata to read.
-   *
-   * @param val Boolean value whether to use pandas metadata
-   */
-  void enable_use_pandas_metadata(bool val) { _use_pandas_metadata = val; }
-
-  /**
-   * @brief Sets reader column schema.
-   *
-   * @param val Tree of schema nodes to enable/disable conversion of binary to string columns.
-   * Note default is to convert to string columns.
-   */
-  void set_column_schema(std::vector<reader_column_schema> val)
-  {
-    _reader_column_schema = std::move(val);
-  }
-
-  /**
-   * @brief Sets number of rows to skip.
-   *
-   * @param val Number of rows to skip from start
-   */
-  void set_skip_rows(size_type val)
-  {
-    if ((val != 0) and (!_row_groups.empty())) {
-      CUDF_FAIL("skip_rows can't be set along with a non-empty row_groups");
-    }
-
-    _skip_rows = val;
-  }
-
-  /**
-   * @brief Sets number of rows to read.
-   *
-   * @param val Number of rows to read after skip
-   */
-  void set_num_rows(size_type val)
-  {
-    if ((val != -1) and (!_row_groups.empty())) {
-      CUDF_FAIL("num_rows can't be set along with a non-empty row_groups");
-    }
-
-    _num_rows = val;
-  }
-
-  /**
-   * @brief Sets timestamp_type used to cast timestamp columns.
-   *
-   * @param type The timestamp data_type to which all timestamp columns need to be cast
-   */
-  void set_timestamp_type(data_type type) { _timestamp_type = type; }
+  void set_byte_limit(std::size_t byte_limit) { _byte_limit = byte_limit; }
 };
 
 /**

From dd39804cc941a3681aa588efdadb02b21ed761e4 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 4 Oct 2022 12:27:54 -0700
Subject: [PATCH 005/162] Exploit inheritance to extend the options and
 options_builder classes

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 119 +++++++-------------------------
 1 file changed, 25 insertions(+), 94 deletions(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 434207f144e..0147f341042 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -50,6 +50,7 @@ class parquet_reader_options_builder;
  * @brief Settings for `read_parquet()`.
  */
 class parquet_reader_options {
+ protected:
   source_info _source;
 
   // Path in schema of column to read; `nullopt` is all
@@ -252,6 +253,7 @@ class parquet_reader_options {
  * @brief Builds parquet_reader_options to use for `read_parquet()`.
  */
 class parquet_reader_options_builder {
+ protected:
   parquet_reader_options options;
 
  public:
@@ -433,8 +435,18 @@ class chunked_parquet_reader_options : public parquet_reader_options {
 /**
  * @brief Builds parquet_reader_options to use for `read_parquet()`.
  */
-class chunked_parquet_reader_options_builder {
-  chunked_parquet_reader_options options;
+class chunked_parquet_reader_options_builder : public parquet_reader_options_builder {
+  // Limit the number of maximum bytes that chunked_parquet_reader will read each time.
+  // This will be passed into the private instance of `chunked_parquet_reader_options`.
+  std::size_t _byte_limit;
+
+  chunked_parquet_reader_options create_options()
+  {
+    auto chunked_reader_options = chunked_parquet_reader_options{};
+    dynamic_cast<parquet_reader_options&>(chunked_reader_options) = std::move(options);
+    chunked_reader_options.set_byte_limit(_byte_limit);
+    return chunked_reader_options;
+  }
 
  public:
   /**
@@ -449,117 +461,36 @@ class chunked_parquet_reader_options_builder {
    *
    * @param src The source information used to read parquet file
    */
-  explicit chunked_parquet_reader_options_builder(source_info const& src) : options(src) {}
-
-  /**
-   * @brief Sets names of the columns to be read.
-   *
-   * @param col_names Vector of column names
-   * @return this for chaining
-   */
-  chunked_parquet_reader_options_builder& columns(std::vector<std::string> col_names)
-  {
-    options._columns = std::move(col_names);
-    return *this;
-  }
-
-  /**
-   * @brief Sets vector of individual row groups to read.
-   *
-   * @param row_groups Vector of row groups to read
-   * @return this for chaining
-   */
-  chunked_parquet_reader_options_builder& row_groups(std::vector<std::vector<size_type>> row_groups)
-  {
-    options.set_row_groups(std::move(row_groups));
-    return *this;
-  }
-
-  /**
-   * @brief Sets enable/disable conversion of strings to categories.
-   *
-   * @param val Boolean value to enable/disable conversion of string columns to categories
-   * @return this for chaining
-   */
-  chunked_parquet_reader_options_builder& convert_strings_to_categories(bool val)
+  explicit chunked_parquet_reader_options_builder(source_info const& src)
+    : parquet_reader_options_builder(src)
   {
-    options._convert_strings_to_categories = val;
-    return *this;
   }
 
   /**
-   * @brief Sets to enable/disable use of pandas metadata to read.
+   * @brief Sets number of byte limit to read each time.
    *
-   * @param val Boolean value whether to use pandas metadata
-   * @return this for chaining
-   */
-  chunked_parquet_reader_options_builder& use_pandas_metadata(bool val)
-  {
-    options._use_pandas_metadata = val;
-    return *this;
-  }
-
-  /**
-   * @brief Sets reader metadata.
-   *
-   * @param val Tree of metadata information.
-   * @return this for chaining
-   */
-  chunked_parquet_reader_options_builder& set_column_schema(std::vector<reader_column_schema> val)
-  {
-    options._reader_column_schema = std::move(val);
-    return *this;
-  }
-
-  /**
-   * @brief Sets number of rows to skip.
-   *
-   * @param val Number of rows to skip from start
-   * @return this for chaining
-   */
-  chunked_parquet_reader_options_builder& skip_rows(size_type val)
-  {
-    options.set_skip_rows(val);
-    return *this;
-  }
-
-  /**
-   * @brief Sets number of rows to read.
-   *
-   * @param val Number of rows to read after skip
-   * @return this for chaining
-   */
-  chunked_parquet_reader_options_builder& num_rows(size_type val)
-  {
-    options.set_num_rows(val);
-    return *this;
-  }
-
-  /**
-   * @brief timestamp_type used to cast timestamp columns.
-   *
-   * @param type The timestamp data_type to which all timestamp columns need to be cast
+   * @param byte_limit Number of maximum bytes to read
    * @return this for chaining
    */
-  chunked_parquet_reader_options_builder& timestamp_type(data_type type)
+  chunked_parquet_reader_options_builder& byte_limit(std::size_t byte_limit)
   {
-    options._timestamp_type = type;
+    _byte_limit = byte_limit;
     return *this;
   }
 
   /**
-   * @brief move parquet_reader_options member once it's built.
+   * @brief Return `chunked_parquet_reader_options` instance once this's built.
    */
-  operator chunked_parquet_reader_options&&() { return std::move(options); }
+  operator chunked_parquet_reader_options() { return create_options(); }
 
   /**
-   * @brief move parquet_reader_options member once it's built.
+   * @brief Return `chunked_parquet_reader_options` instance once this's built.
    *
    * This has been added since Cython does not support overloading of conversion operators.
    *
-   * @return Built `parquet_reader_options` object's r-value reference
+   * @return Built `chunked_parquet_reader_options` object's r-value reference
    */
-  chunked_parquet_reader_options&& build() { return std::move(options); }
+  chunked_parquet_reader_options build() { return create_options(); }
 };
 
 /** @} */  // end of group

From 81bc68f9181e23c1fecb1c21d6f8f92b178bea80 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 4 Oct 2022 17:34:43 -0700
Subject: [PATCH 006/162] Remove unnecessary variable

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 0147f341042..8dbc05a792b 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -436,15 +436,11 @@ class chunked_parquet_reader_options : public parquet_reader_options {
  * @brief Builds parquet_reader_options to use for `read_parquet()`.
  */
 class chunked_parquet_reader_options_builder : public parquet_reader_options_builder {
-  // Limit the number of maximum bytes that chunked_parquet_reader will read each time.
-  // This will be passed into the private instance of `chunked_parquet_reader_options`.
-  std::size_t _byte_limit;
+  chunked_parquet_reader_options chunked_reader_options{};
 
   chunked_parquet_reader_options create_options()
   {
-    auto chunked_reader_options = chunked_parquet_reader_options{};
     dynamic_cast<parquet_reader_options&>(chunked_reader_options) = std::move(options);
-    chunked_reader_options.set_byte_limit(_byte_limit);
     return chunked_reader_options;
   }
 
@@ -469,12 +465,12 @@ class chunked_parquet_reader_options_builder : public parquet_reader_options_bui
   /**
    * @brief Sets number of byte limit to read each time.
    *
-   * @param byte_limit Number of maximum bytes to read
+   * @param limit Number of maximum bytes to read per `read_next()` call
    * @return this for chaining
    */
-  chunked_parquet_reader_options_builder& byte_limit(std::size_t byte_limit)
+  chunked_parquet_reader_options_builder& byte_limit(std::size_t limit)
   {
-    _byte_limit = byte_limit;
+    chunked_reader_options.set_byte_limit(limit);
     return *this;
   }
 

From f8126befc75bb3c9cc7cc8d1d6edf8d2fe56edf9 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 4 Oct 2022 17:35:00 -0700
Subject: [PATCH 007/162] Misc

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 8dbc05a792b..8d8bfebf0ca 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -412,7 +412,7 @@ class chunked_parquet_reader_options : public parquet_reader_options {
   // Limit the number of maximum bytes that chunked_parquet_reader will read each time.
   std::size_t _byte_limit;
 
-  friend chunked_parquet_reader_options_builder;
+  friend class chunked_parquet_reader_options_builder;
 
  public:
   /**

From 0e7692c27896703c7ad2b2a599bebdd052696771 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 4 Oct 2022 21:02:45 -0700
Subject: [PATCH 008/162] Add docs

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 8d8bfebf0ca..cb174ec7c2b 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -438,6 +438,12 @@ class chunked_parquet_reader_options : public parquet_reader_options {
 class chunked_parquet_reader_options_builder : public parquet_reader_options_builder {
   chunked_parquet_reader_options chunked_reader_options{};
 
+  /**
+   * @brief Create a `chunked_parquet_reader_options` object.
+   *
+   * The returned object is a result of taking over the ownership of the internal states.
+   * Therefore, this should be called at most once.
+   */
   chunked_parquet_reader_options create_options()
   {
     dynamic_cast<parquet_reader_options&>(chunked_reader_options) = std::move(options);

From 9f9eeb06bb526e697f6747a6f7f35d4a8a793061 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Wed, 5 Oct 2022 10:31:33 -0500
Subject: [PATCH 009/162] PR feedback changes.

---
 cpp/src/io/parquet/page_data.cu   | 2 +-
 cpp/src/io/parquet/reader_impl.cu | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 107c54b6872..a5f6d737637 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -1482,7 +1482,7 @@ __global__ void __launch_bounds__(block_size)
 
   // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
   // containing lists anywhere within).
-  bool has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
+  bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
   if (!has_repetition) { return; }
 
   if (!setupLocalPageInfo(s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX)) {
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 8be0b388f0e..07869189089 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1360,11 +1360,11 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
   // if we have any list columns that need further processing.
   bool has_lists = false;
   for (size_t idx = 0; idx < _input_columns.size(); idx++) {
-    auto const& input_col = _input_columns[idx];
-    size_t max_depth      = input_col.nesting_depth();
+    auto const& input_col  = _input_columns[idx];
+    size_t const max_depth = input_col.nesting_depth();
 
     auto* cols = &_output_columns;
-    for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
+    for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
       auto& out_buf = (*cols)[input_col.nesting[l_idx]];
       cols          = &out_buf.children;
 

From d2e409ab68ce580c16cdebe64df343005c49f6ec Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Wed, 5 Oct 2022 14:03:53 -0500
Subject: [PATCH 010/162] Fixed some compile errors from merging.

---
 cpp/tests/io/parquet_test.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 906fb1c388b..134eff54144 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -2652,19 +2652,19 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
   // write it out
   cudf::table_view tbl({c0, *c1, *c2, *c3});
   auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsMixedTypes.parquet");
-  cudf_io::parquet_writer_options out_args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl);
-  cudf_io::write_parquet(out_args);
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
 
   // read it back
   std::vector<std::pair<int, int>> params{
     {-1, -1}, {0, num_rows}, {1, num_rows - 1}, {num_rows - 1, 1}, {517, 22000}};
   for (auto p : params) {
-    cudf_io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath});
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
     if (p.first >= 0) { read_args.set_skip_rows(p.first); }
     if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf_io::read_parquet(read_args);
+    auto result = cudf::io::read_parquet(read_args);
 
     p.first  = p.first < 0 ? 0 : p.first;
     p.second = p.second < 0 ? num_rows - p.first : p.second;

From ed41ac1bc35bcfe56e1b1305555bb0a705bbe548 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 4 Oct 2022 21:07:10 -0700
Subject: [PATCH 011/162] Add `chunked_parquet_reader`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 89 ++++++++++++++++++++++++++++++---
 1 file changed, 82 insertions(+), 7 deletions(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index cb174ec7c2b..176aff92b0d 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -417,37 +417,42 @@ class chunked_parquet_reader_options : public parquet_reader_options {
  public:
   /**
    * @brief Return the maximum number of bytes that will be read by
-   * `chunked_parquet_reader::read_next()`.
+   * `chunked_parquet_reader::read()`.
    *
-   * @return Number of maximum bytes to read
+   * @return Number of maximum bytes to read each time
    */
   [[nodiscard]] size_type get_byte_limit() const { return _byte_limit; }
 
   /**
    * @brief Sets the maximum number of bytes that will be read by
-   * `chunked_parquet_reader::read_next()`.
+   * `chunked_parquet_reader::read()`.
    *
-   * @param byte_limit Number of maximum bytes to read
+   * @param byte_limit Number of maximum bytes to read each time
    */
   void set_byte_limit(std::size_t byte_limit) { _byte_limit = byte_limit; }
 };
 
 /**
- * @brief Builds parquet_reader_options to use for `read_parquet()`.
+ * @brief Builds a `chunked_parquet_reader_options` instance to use with `chunked_parquet_reader`
+ * class.
  */
 class chunked_parquet_reader_options_builder : public parquet_reader_options_builder {
   chunked_parquet_reader_options chunked_reader_options{};
+  bool options_built{false};
 
   /**
    * @brief Create a `chunked_parquet_reader_options` object.
    *
    * The returned object is a result of taking over the ownership of the internal states.
-   * Therefore, this should be called at most once.
+   * Therefore, this should be called at most once to avoid data corruption.
    */
   chunked_parquet_reader_options create_options()
   {
+    CUDF_EXPECTS(!options_built, "This function should not be called more than once");
+    options_built = true;
+
     dynamic_cast<parquet_reader_options&>(chunked_reader_options) = std::move(options);
-    return chunked_reader_options;
+    return std::move(chunked_reader_options);
   }
 
  public:
@@ -495,6 +500,76 @@ class chunked_parquet_reader_options_builder : public parquet_reader_options_bui
   chunked_parquet_reader_options build() { return create_options(); }
 };
 
+/**
+ * @brief The chunked parquet reader class to handle options and read tables in chunks.
+ *
+ * This class is designed to address the reading issue with parquet files that are very larger such
+ * that their content columns exceed the size limit in cudf. By reading the file content by chunks
+ * using this class, each chunk is guaranteed to have column sizes stay within the given limit.
+ */
+class chunked_parquet_reader {
+ public:
+  /**
+   * @brief Default constructor, this should never be used.
+   *        This is added just to satisfy cython.
+   */
+  chunked_parquet_reader() = default;
+
+  /**
+   * @brief Constructor with chunked reader options.
+   *
+   * @param options The options used to read file
+   * @param mr Device memory resource to use for device memory allocation
+   */
+  chunked_parquet_reader(
+    chunked_parquet_reader_options const& options,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+  /**
+   * @brief Reads a chunk of Parquet dataset into a set of columns.
+   *
+   * TODO: move this paragraph into implementation code.
+   * On the first call, a preprocessing step is called which may be expensive before a table is
+   * returned. All subsequent calls are essentially just doing incremental column allocation and row
+   * decoding (using all the data stored from the preprocessing step). After each call, an internal
+   * `skip_rows` state is updated such that the next call will skip the rows returned by the
+   * previous call, making sure that the sequence of returned tables are continuous and form
+   * a complete dataset as reading the entire file at once.
+   *
+   *
+   * The sequence of returned tables, if concatenated by their order, guarantee to form a complete
+   * dataset as reading the entire given file at once.
+   *
+   * An empty table will be returned if all the data in the given file has been read and returned by
+   * the previous calls.
+   *
+   * @return The set of columns along with metadata
+   */
+
+  table_with_metadata read();
+
+  /**
+   * @brief Check if there is any data of the given file has not yet processed.
+   *
+   * @return A boolean value indicating if there is any data left to process
+   */
+  bool has_next();
+
+ private:
+  /**
+   * @brief Perform all necessary preprocessing work for reading the given file.
+   *
+   * The preprocessing is performed for the entire file, not just by chunks, which may include:
+   *  - Parsing the schema.
+   *  - Decompressing and processing pages.
+   *  - Any other necessary preprocessing steps.
+   */
+  void preprocess();
+
+  // The internal instance of the reader class to perform chunked reading.
+  std::unique_ptr<cudf::io::detail::parquet::reader> reader;
+};
+
 /** @} */  // end of group
 /**
  * @addtogroup io_writers

From be782f2c88342deb5573d7a6e7990939046cd484 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 5 Oct 2022 14:17:33 -0700
Subject: [PATCH 012/162] Add empty implementation

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/functions.cpp | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index aabaa941daf..8e8c7bbe8ec 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -465,6 +465,41 @@ std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const
   return writer->close(options.get_column_chunks_file_paths());
 }
 
+/**
+ * @copydoc cudf::io::chunked_parquet_reader::chunked_parquet_reader
+ */
+chunked_parquet_reader::chunked_parquet_reader(chunked_parquet_reader_options const& options,
+                                               rmm::mr::device_memory_resource* mr)
+{
+  //
+}
+
+/**
+ * @copydoc cudf::io::chunked_parquet_reader::read
+ */
+table_with_metadata chunked_parquet_reader::read()
+{
+  // TODO
+  return table_with_metadata{};
+}
+
+/**
+ * @copydoc cudf::io::chunked_parquet_reader::has_next
+ */
+bool chunked_parquet_reader::has_next()
+{
+  // TODO
+  return true;
+}
+
+/**
+ * @copydoc cudf::io::chunked_parquet_reader::preprocess
+ */
+void chunked_parquet_reader::preprocess()
+{
+  // TODO
+}
+
 /**
  * @copydoc cudf::io::parquet_chunked_writer::parquet_chunked_writer
  */

From a7175c87b617d7298d3ef7f4423c854f0f85f53c Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 5 Oct 2022 15:21:46 -0700
Subject: [PATCH 013/162] Add a destructor and `close`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 17 ++++++++++++++++-
 cpp/src/io/functions.cpp        |  8 ++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 176aff92b0d..713ec7a30ce 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -525,6 +525,11 @@ class chunked_parquet_reader {
     chunked_parquet_reader_options const& options,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+  /**
+   * @brief Destructor, calling `close()` for the reading file to release resources.
+   */
+  ~chunked_parquet_reader() { close(); }
+
   /**
    * @brief Reads a chunk of Parquet dataset into a set of columns.
    *
@@ -541,7 +546,7 @@ class chunked_parquet_reader {
    * dataset as reading the entire given file at once.
    *
    * An empty table will be returned if all the data in the given file has been read and returned by
-   * the previous calls.
+   * the previous calls, or the `close()` function has been called.
    *
    * @return The set of columns along with metadata
    */
@@ -551,10 +556,20 @@ class chunked_parquet_reader {
   /**
    * @brief Check if there is any data of the given file has not yet processed.
    *
+   * If the file has been closed (i.e., the `close()` function has been called), this will always
+   * return `false`.
+   *
    * @return A boolean value indicating if there is any data left to process
    */
   bool has_next();
 
+  /**
+   * @brief Close the reading file to release internal resources.
+   *
+   * This should not have any effect if called upon an already closed file.
+   */
+  void close();
+
  private:
   /**
    * @brief Perform all necessary preprocessing work for reading the given file.
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 8e8c7bbe8ec..f9cda8aa87c 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -500,6 +500,14 @@ void chunked_parquet_reader::preprocess()
   // TODO
 }
 
+/**
+ * @copydoc cudf::io::chunked_parquet_reader::close
+ */
+void chunked_parquet_reader::close()
+{
+  // TODO
+}
+
 /**
  * @copydoc cudf::io::parquet_chunked_writer::parquet_chunked_writer
  */

From 63a7bd604b691ba26db52c2c469e7fb8a0d4e670 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 6 Oct 2022 08:43:58 -0700
Subject: [PATCH 014/162] Update docs

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 9 ---------
 cpp/src/io/functions.cpp        | 7 ++++++-
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 713ec7a30ce..9e612423081 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -533,15 +533,6 @@ class chunked_parquet_reader {
   /**
    * @brief Reads a chunk of Parquet dataset into a set of columns.
    *
-   * TODO: move this paragraph into implementation code.
-   * On the first call, a preprocessing step is called which may be expensive before a table is
-   * returned. All subsequent calls are essentially just doing incremental column allocation and row
-   * decoding (using all the data stored from the preprocessing step). After each call, an internal
-   * `skip_rows` state is updated such that the next call will skip the rows returned by the
-   * previous call, making sure that the sequence of returned tables are continuous and form
-   * a complete dataset as reading the entire file at once.
-   *
-   *
    * The sequence of returned tables, if concatenated by their order, guarantee to form a complete
    * dataset as reading the entire given file at once.
    *
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index f9cda8aa87c..c9963d7e50d 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -479,7 +479,12 @@ chunked_parquet_reader::chunked_parquet_reader(chunked_parquet_reader_options co
  */
 table_with_metadata chunked_parquet_reader::read()
 {
-  // TODO
+  // On the first call, a preprocessing step is called which may be expensive before a table is
+  // returned. All subsequent calls are essentially just doing incremental column allocation and row
+  // decoding (using all the data stored from the preprocessing step).
+  // After each call, an internal `skip_rows` state is updated such that the next call will skip the
+  // rows returned by the previous call, making sure that the sequence of returned tables are
+  // continuous and form a complete dataset as reading the entire file at once.
   return table_with_metadata{};
 }
 

From 16c12d93d8ec47ea053f44a5019e5107d1396ce1 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 6 Oct 2022 11:00:48 -0700
Subject: [PATCH 015/162] Fix comment

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 9e612423081..bf9385f83f8 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -557,7 +557,7 @@ class chunked_parquet_reader {
   /**
    * @brief Close the reading file to release internal resources.
    *
-   * This should not have any effect if called upon an already closed file.
+   * This should not have any effect if being called on an already closed file.
    */
   void close();
 

From cd8538588fbe51abe734f52a52afa157d1e135f2 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 6 Oct 2022 11:05:21 -0700
Subject: [PATCH 016/162] Construct `chunked_parquet_reader`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/functions.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index c9963d7e50d..90070feffcf 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -470,8 +470,12 @@ std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const
  */
 chunked_parquet_reader::chunked_parquet_reader(chunked_parquet_reader_options const& options,
                                                rmm::mr::device_memory_resource* mr)
+  : reader{
+      std::make_unique<detail_parquet::reader>(make_datasources(options.get_source()),
+                                               dynamic_cast<parquet_reader_options const&>(options),
+                                               cudf::default_stream_value,
+                                               mr)}
 {
-  //
 }
 
 /**

From 5944beb2d603f7b4c4135d43524920039c8360f0 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 6 Oct 2022 11:07:53 -0700
Subject: [PATCH 017/162] Add comment

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index bf9385f83f8..7f29c07bc44 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -573,6 +573,7 @@ class chunked_parquet_reader {
   void preprocess();
 
   // The internal instance of the reader class to perform chunked reading.
+  // TODO: Replace this class with a reader class that has interface supporting chunked reading
   std::unique_ptr<cudf::io::detail::parquet::reader> reader;
 };
 

From 7cfa72a129387a359dac977691f6c3a3c3d93a29 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 7 Oct 2022 08:31:08 -0700
Subject: [PATCH 018/162] Rename function and implementing

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 13 ++++++++++---
 cpp/src/io/functions.cpp        | 15 +++++----------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 7f29c07bc44..0a71dca4c8b 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -531,7 +531,7 @@ class chunked_parquet_reader {
   ~chunked_parquet_reader() { close(); }
 
   /**
-   * @brief Reads a chunk of Parquet dataset into a set of columns.
+   * @brief Read a chunk of Parquet dataset into a set of columns.
    *
    * The sequence of returned tables, if concatenated by their order, guarantee to form a complete
    * dataset as reading the entire given file at once.
@@ -542,7 +542,7 @@ class chunked_parquet_reader {
    * @return The set of columns along with metadata
    */
 
-  table_with_metadata read();
+  table_with_metadata read_chunk();
 
   /**
    * @brief Check if there is any data of the given file has not yet processed.
@@ -552,7 +552,11 @@ class chunked_parquet_reader {
    *
    * @return A boolean value indicating if there is any data left to process
    */
-  bool has_next();
+  bool has_next()
+  {
+    // TODO: handle closed file
+    return skip_rows >= total_rows;
+  }
 
   /**
    * @brief Close the reading file to release internal resources.
@@ -575,6 +579,9 @@ class chunked_parquet_reader {
   // The internal instance of the reader class to perform chunked reading.
   // TODO: Replace this class with a reader class that has interface supporting chunked reading
   std::unique_ptr<cudf::io::detail::parquet::reader> reader;
+
+  size_type skip_rows{0};
+  size_type total_rows{0};
 };
 
 /** @} */  // end of group
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 90070feffcf..617a477eefc 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -481,7 +481,7 @@ chunked_parquet_reader::chunked_parquet_reader(chunked_parquet_reader_options co
 /**
  * @copydoc cudf::io::chunked_parquet_reader::read
  */
-table_with_metadata chunked_parquet_reader::read()
+table_with_metadata chunked_parquet_reader::read_chunk()
 {
   // On the first call, a preprocessing step is called which may be expensive before a table is
   // returned. All subsequent calls are essentially just doing incremental column allocation and row
@@ -489,16 +489,11 @@ table_with_metadata chunked_parquet_reader::read()
   // After each call, an internal `skip_rows` state is updated such that the next call will skip the
   // rows returned by the previous call, making sure that the sequence of returned tables are
   // continuous and form a complete dataset as reading the entire file at once.
-  return table_with_metadata{};
-}
 
-/**
- * @copydoc cudf::io::chunked_parquet_reader::has_next
- */
-bool chunked_parquet_reader::has_next()
-{
-  // TODO
-  return true;
+  auto output = reader->read(parquet_reader_options{});
+  skip_rows += output.tbl->num_rows();
+
+  return output;
 }
 
 /**

From 4696bd392771483dfe57f1ab157344cd9c190c0e Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 7 Oct 2022 15:59:48 -0700
Subject: [PATCH 019/162] MISC

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp |  3 ++-
 cpp/src/io/functions.cpp        | 13 +++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 0a71dca4c8b..cf836edf4a8 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -554,7 +554,8 @@ class chunked_parquet_reader {
    */
   bool has_next()
   {
-    // TODO: handle closed file
+    // TODO:
+    // if(reader->is_close()) { return false; }
     return skip_rows >= total_rows;
   }
 
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 617a477eefc..5925b822414 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -479,17 +479,18 @@ chunked_parquet_reader::chunked_parquet_reader(chunked_parquet_reader_options co
 }
 
 /**
- * @copydoc cudf::io::chunked_parquet_reader::read
+ * @copydoc cudf::io::chunked_parquet_reader::read_chunk
  */
 table_with_metadata chunked_parquet_reader::read_chunk()
 {
   // On the first call, a preprocessing step is called which may be expensive before a table is
   // returned. All subsequent calls are essentially just doing incremental column allocation and row
   // decoding (using all the data stored from the preprocessing step).
-  // After each call, an internal `skip_rows` state is updated such that the next call will skip the
-  // rows returned by the previous call, making sure that the sequence of returned tables are
-  // continuous and form a complete dataset as reading the entire file at once.
+  preprocess();
 
+  // In each call to this function, the internal `skip_rows` state is updated such that the next
+  // call will skip the rows returned by the previous call, making sure that the sequence of
+  // returned tables are continuous and form a complete dataset as reading the entire file at once.
   auto output = reader->read(parquet_reader_options{});
   skip_rows += output.tbl->num_rows();
 
@@ -502,6 +503,8 @@ table_with_metadata chunked_parquet_reader::read_chunk()
 void chunked_parquet_reader::preprocess()
 {
   // TODO
+  // This step should be a no-op after if it is called from the second time.
+  // reader->preprocess();
 }
 
 /**
@@ -510,6 +513,8 @@ void chunked_parquet_reader::preprocess()
 void chunked_parquet_reader::close()
 {
   // TODO
+  // This step should be a no-op if it was called before.
+  // reader->close();
 }
 
 /**

From 99dc7865ced27f412a18d2b8c9cbfc20fcd1593b Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Mon, 10 Oct 2022 17:06:28 -0500
Subject: [PATCH 020/162] Bare bones implementation.  Many types still not
 working.

---
 cpp/CMakeLists.txt                      |   1 +
 cpp/src/io/parquet/page_data.cu         | 239 ++++-----
 cpp/src/io/parquet/parquet_gpu.hpp      |  25 +-
 cpp/src/io/parquet/reader_impl.cu       |  63 ++-
 cpp/src/io/parquet/reader_impl.hpp      |  19 +-
 cpp/src/io/parquet/reader_preprocess.cu | 618 ++++++++++++++++++++++++
 6 files changed, 806 insertions(+), 159 deletions(-)
 create mode 100644 cpp/src/io/parquet/reader_preprocess.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 60e914f07d3..c3b9403b117 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -350,6 +350,7 @@ add_library(
   src/io/parquet/page_enc.cu
   src/io/parquet/page_hdr.cu
   src/io/parquet/reader_impl.cu
+  src/io/parquet/reader_preprocess.cu
   src/io/parquet/writer_impl.cu
   src/io/statistics/orc_column_statistics.cu
   src/io/statistics/parquet_column_statistics.cu
diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index a5f6d737637..9ff3818b6e9 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -18,15 +18,19 @@
 #include <io/utilities/block_utils.cuh>
 #include <io/utilities/column_buffer.hpp>
 
+#include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/assert.cuh>
 #include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/strings/string_view.hpp>
 #include <cudf/utilities/bit.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <thrust/binary_search.h>
 #include <thrust/functional.h>
+#include <thrust/iterator/discard_iterator.h>
 #include <thrust/iterator/iterator_categories.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/transform_output_iterator.h>
@@ -1480,12 +1484,22 @@ __global__ void __launch_bounds__(block_size)
   int t                 = threadIdx.x;
   PageInfo* pp          = &pages[page_idx];
 
+  if (!setupLocalPageInfo(s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX)) {
+    return;
+  }
+
   // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
   // containing lists anywhere within).
   bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
-  if (!has_repetition) { return; }
 
-  if (!setupLocalPageInfo(s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX)) {
+  // if this is a flat data type, compute the size directly from the number of values.
+  // NOTE: does not work for strings!
+  if (!has_repetition) {
+    if (!t) {
+      for (size_type idx = 0; idx < pp->num_nesting_levels; idx++) {
+        pp->nesting[idx].size = pp->num_input_values;
+      }
+    }
     return;
   }
 
@@ -1737,149 +1751,108 @@ struct start_offset_output_iterator {
   }
 };
 
-/**
- * @copydoc cudf::io::parquet::gpu::PreprocessColumnData
- */
-void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
-                          hostdevice_vector<ColumnChunkDesc> const& chunks,
-                          std::vector<input_column_info>& input_columns,
-                          std::vector<cudf::io::detail::column_buffer>& output_columns,
-                          size_t num_rows,
-                          size_t min_row,
-                          bool uses_custom_row_bounds,
-                          rmm::cuda_stream_view stream,
-                          rmm::mr::device_memory_resource* mr)
-{
-  dim3 dim_block(block_size, 1);
-  dim3 dim_grid(pages.size(), 1);  // 1 threadblock per page
-
-  // computes:
-  // PageNestingInfo::size for each level of nesting, for each page.
-  // This computes the size for the entire page, not taking row bounds into account.
-  // If uses_custom_row_bounds is set to true, we have to do a second pass later that "trims"
-  // the starting and ending read values to account for these bounds.
-  gpuComputePageSizes<<<dim_grid, dim_block, 0, stream.value()>>>(
-    pages.device_ptr(),
-    chunks,
-    // if uses_custom_row_bounds is false, include all possible rows.
-    uses_custom_row_bounds ? min_row : 0,
-    uses_custom_row_bounds ? num_rows : INT_MAX,
-    !uses_custom_row_bounds);
-
-  // computes:
-  // PageInfo::chunk_row for all pages
-  // Note: this is doing some redundant work for pages in flat hierarchies.  chunk_row has already
-  // been computed during header decoding. the overall amount of work here is very small though.
-  auto key_input = thrust::make_transform_iterator(
-    pages.device_ptr(), [] __device__(PageInfo const& page) { return page.chunk_idx; });
-  auto page_input = thrust::make_transform_iterator(
-    pages.device_ptr(), [] __device__(PageInfo const& page) { return page.num_rows; });
-  thrust::exclusive_scan_by_key(rmm::exec_policy(stream),
-                                key_input,
-                                key_input + pages.size(),
-                                page_input,
-                                chunk_row_output_iter{pages.device_ptr()});
-
-  // computes:
-  // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
-  // PageInfo::skipped_values, which tells us where to start decoding in the input  .
-  // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
-  // specified artifical bounds).
-  if (uses_custom_row_bounds) {
-    gpuComputePageSizes<<<dim_grid, dim_block, 0, stream.value()>>>(
-      pages.device_ptr(), chunks, min_row, num_rows, true);
+struct cumulative_row_info {
+  size_t row_count;   // cumulative row count
+  size_t size_bytes;  // cumulative size in bytes
+  int key;            // schema index
+};
+struct cumulative_row_sum {
+  cumulative_row_info operator()
+    __device__(cumulative_row_info const& a, cumulative_row_info const& b) const
+  {
+    return cumulative_row_info{a.row_count + b.row_count, a.size_bytes + b.size_bytes, a.key};
   }
+};
 
-  // ordering of pages is by input column schema, repeated across row groups.  so
-  // if we had 3 columns, each with 2 pages, and 1 row group, our schema values might look like
-  //
-  // 1, 1, 2, 2, 3, 3
-  //
-  // However, if we had more than one row group, the pattern would be
-  //
-  // 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3
-  // ^ row group 0     |
-  //                   ^ row group 1
-  //
-  // To use exclusive_scan_by_key, the ordering we actually want is
-  //
-  // 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3
-  //
-  // We also need to preserve key-relative page ordering, so we need to use a stable sort.
-  rmm::device_uvector<int> page_keys(pages.size(), stream);
-  rmm::device_uvector<int> page_index(pages.size(), stream);
+struct row_size_functor {
+  template <typename T>
+  __device__ size_t operator()(size_t num_rows, bool nullable)
   {
-    thrust::transform(rmm::exec_policy(stream),
-                      pages.device_ptr(),
-                      pages.device_ptr() + pages.size(),
-                      page_keys.begin(),
-                      [] __device__(PageInfo const& page) { return page.src_col_schema; });
-
-    thrust::sequence(rmm::exec_policy(stream), page_index.begin(), page_index.end());
-    thrust::stable_sort_by_key(rmm::exec_policy(stream),
-                               page_keys.begin(),
-                               page_keys.end(),
-                               page_index.begin(),
-                               thrust::less<int>());
+    auto const element_size = sizeof(device_storage_type_t<T>);
+    return (element_size * num_rows) +
+           (nullable ? (cudf::util::div_rounding_up_safe(num_rows, size_t{32}) / 8) : 0);
   }
+};
 
-  // compute output column sizes by examining the pages of the -input- columns
-  for (size_t idx = 0; idx < input_columns.size(); idx++) {
-    auto const& input_col = input_columns[idx];
-    auto src_col_schema   = input_col.schema_idx;
-    size_t max_depth      = input_col.nesting_depth();
-
-    auto* cols = &output_columns;
-    for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
-      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
-      cols          = &out_buf.children;
-
-      // size iterator. indexes pages by sorted order
-      auto size_input = thrust::make_transform_iterator(
-        page_index.begin(),
-        [src_col_schema, l_idx, pages = pages.device_ptr()] __device__(int index) {
-          auto const& page = pages[index];
-          if (page.src_col_schema != src_col_schema || page.flags & PAGEINFO_FLAGS_DICTIONARY) {
-            return 0;
-          }
-          return page.nesting[l_idx].size;
-        });
+template <>
+__device__ size_t row_size_functor::operator()<list_view>(size_t num_rows, bool nullable)
+{
+  auto const offset_size = sizeof(offset_type);
+  return (offset_size * (num_rows + 1)) +
+         (nullable ? (cudf::util::div_rounding_up_safe(num_rows, size_t{32}) / 8) : 0);
+}
 
-      // if this buffer is part of a list hierarchy, we need to determine it's
-      // final size and allocate it here.
-      //
-      // for struct columns, higher levels of the output columns are shared between input
-      // columns. so don't compute any given level more than once.
-      if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) && out_buf.size == 0) {
-        int size = thrust::reduce(rmm::exec_policy(stream), size_input, size_input + pages.size());
+template <>
+__device__ size_t row_size_functor::operator()<struct_view>(size_t num_rows, bool nullable)
+{
+  return nullable ? (cudf::util::div_rounding_up_safe(num_rows, size_t{32}) / 8) : 0;
+}
 
-        // if this is a list column add 1 for non-leaf levels for the terminating offset
-        if (out_buf.type.id() == type_id::LIST && l_idx < max_depth) { size++; }
+template <>
+__device__ size_t row_size_functor::operator()<string_view>(size_t num_rows, bool nullable)
+{
+  // CUDF_FAIL("String types currently unsupported");
+  return 0;
+}
 
-        // allocate
-        out_buf.create(size, stream, mr);
-      }
+struct get_cumulative_row_info {
+  PageInfo const* const pages;
 
-      // for nested hierarchies, compute per-page start offset.
-      // it would be better/safer to be checking (schema.max_repetition_level > 0) here, but there's
-      // no easy way to get at that info here. we'd have to move this function into reader_impl.cu
-      if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) ||
-          out_buf.type.id() == type_id::LIST) {
-        thrust::exclusive_scan_by_key(rmm::exec_policy(stream),
-                                      page_keys.begin(),
-                                      page_keys.end(),
-                                      size_input,
-                                      start_offset_output_iterator{pages.device_ptr(),
-                                                                   page_index.begin(),
-                                                                   0,
-                                                                   static_cast<int>(src_col_schema),
-                                                                   static_cast<int>(l_idx)});
-      }
+  cumulative_row_info operator() __device__(size_type index)
+  {
+    auto const& page = pages[index];
+    if (page.flags & PAGEINFO_FLAGS_DICTIONARY) {
+      return cumulative_row_info{0, 0, page.src_col_schema};
     }
+    size_t const row_count = page.nesting[0].size;
+    return cumulative_row_info{
+      row_count,
+      cudf::type_dispatcher(data_type{page.nesting[0].type}, row_size_functor{}, row_count, false),
+      page.src_col_schema};
   }
+};
+
+struct row_total_size {
+  cumulative_row_info const* const c_info;
+  size_type const* const key_offsets;
+  size_t const num_keys;
 
-  // retrieve pages back
-  pages.device_to_host(stream);
+  __device__ cumulative_row_info operator()(cumulative_row_info const& i)
+  {
+    // sum sizes for each input column at this row
+    size_t sum = 0;
+    for (int idx = 0; idx < num_keys; idx++) {
+      auto const start = key_offsets[idx];
+      auto const end   = key_offsets[idx + 1];
+      auto iter        = cudf::detail::make_counting_transform_iterator(
+        0, [&] __device__(size_type i) { return c_info[start + i].row_count; });
+      auto const page_index =
+        (thrust::lower_bound(thrust::seq, iter, iter + (end - start), i.row_count) - iter) + start;
+      // printf("KI(%d): start(%d), end(%d), page_index(%d), size_bytes(%lu)\n", idx, start, end,
+      // (int)page_index, c_info[page_index].size_bytes);
+      sum += c_info[page_index].size_bytes;
+    }
+    return {i.row_count, sum};
+  }
+};
+
+void ComputePageSizes(hostdevice_vector<PageInfo>& pages,
+                      hostdevice_vector<ColumnChunkDesc> const& chunks,
+                      size_t min_row,
+                      size_t num_rows,
+                      bool trim_pass,
+                      rmm::cuda_stream_view stream)
+{
+  dim3 dim_block(block_size, 1);
+  dim3 dim_grid(pages.size(), 1);  // 1 threadblock per page
+
+  // computes:
+  // PageNestingInfo::size for each level of nesting, for each page.
+  // This computes the size for the entire page, not taking row bounds into account.
+  // If uses_custom_row_bounds is set to true, we have to do a second pass later that "trims"
+  // the starting and ending read values to account for these bounds.
+  gpuComputePageSizes<<<dim_grid, dim_block, 0, stream.value()>>>(
+    pages.device_ptr(), chunks, min_row, num_rows, trim_pass);
 }
 
 /**
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 8f4cd5c6f3b..a79ef046bb5 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -99,6 +99,7 @@ struct PageNestingInfo {
   // set at initialization
   int32_t max_def_level;
   int32_t max_rep_level;
+  cudf::type_id type;
 
   // set during preprocessing
   int32_t size;              // this page/nesting-level's size contribution to the output column
@@ -235,6 +236,20 @@ struct ColumnChunkDesc {
   int32_t src_col_schema;  // my schema index in the file
 };
 
+struct chunked_intermediate_data {
+  rmm::device_uvector<int> page_keys;
+  rmm::device_uvector<int> page_index;
+  chunked_intermediate_data()
+    : page_keys(0, rmm::cuda_stream_default), page_index(0, rmm::cuda_stream_default)
+  {
+  }
+};
+
+struct chunked_read_info {
+  size_t skip_rows;
+  size_t num_rows;
+};
+
 /**
  * @brief Struct describing an encoder column
  */
@@ -407,6 +422,13 @@ void BuildStringDictionaryIndex(ColumnChunkDesc* chunks,
                                 int32_t num_chunks,
                                 rmm::cuda_stream_view stream);
 
+void ComputePageSizes(hostdevice_vector<PageInfo>& pages,
+                      hostdevice_vector<ColumnChunkDesc> const& chunks,
+                      size_t num_rows,
+                      size_t min_row,
+                      bool trim_pass,
+                      rmm::cuda_stream_view stream);
+
 /**
  * @brief Preprocess column information for nested schemas.
  *
@@ -428,6 +450,7 @@ void BuildStringDictionaryIndex(ColumnChunkDesc* chunks,
  * bounds
  * @param stream Cuda stream
  */
+/*
 void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
                           hostdevice_vector<ColumnChunkDesc> const& chunks,
                           std::vector<input_column_info>& input_columns,
@@ -436,7 +459,7 @@ void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
                           size_t min_row,
                           bool uses_custom_row_bounds,
                           rmm::cuda_stream_view stream,
-                          rmm::mr::device_memory_resource* mr);
+                          rmm::mr::device_memory_resource* mr);*/
 
 /**
  * @brief Launches kernel for reading the column data stored in the pages
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 07869189089..4e48aa6d634 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1328,6 +1328,8 @@ void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc>
           pni[cur_depth].max_def_level = cur_schema.max_definition_level;
           pni[cur_depth].max_rep_level = cur_schema.max_repetition_level;
           pni[cur_depth].size          = 0;
+          pni[cur_depth].type =
+            to_type_id(cur_schema, _strings_to_categorical, _timestamp_type.id());
         }
 
         // move up the hierarchy
@@ -1349,22 +1351,24 @@ void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc>
 /**
  * @copydoc cudf::io::detail::parquet::preprocess_columns
  */
+/*
 void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                                       hostdevice_vector<gpu::PageInfo>& pages,
                                       size_t min_row,
                                       size_t total_rows,
-                                      bool uses_custom_row_bounds)
+                                      bool uses_custom_row_bounds,
+                                      size_type chunked_read_size)
 {
   // iterate over all input columns and allocate any associated output
   // buffers if they are not part of a list hierarchy. mark down
   // if we have any list columns that need further processing.
   bool has_lists = false;
   for (size_t idx = 0; idx < _input_columns.size(); idx++) {
-    auto const& input_col  = _input_columns[idx];
-    size_t const max_depth = input_col.nesting_depth();
+    auto const& input_col = _input_columns[idx];
+    size_t max_depth      = input_col.nesting_depth();
 
     auto* cols = &_output_columns;
-    for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
+    for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
       auto& out_buf = (*cols)[input_col.nesting[l_idx]];
       cols          = &out_buf.children;
 
@@ -1384,8 +1388,9 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
     }
   }
 
-  // if we have columns containing lists, further preprocessing is necessary.
-  if (has_lists) {
+  // if we have columns containing lists, or if we're doing chunked reads,
+  // further preprocessing is necessary.
+  if (has_lists || chunked_read_size > 0) {
     gpu::PreprocessColumnData(pages,
                               chunks,
                               _input_columns,
@@ -1393,11 +1398,13 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
                               total_rows,
                               min_row,
                               uses_custom_row_bounds,
+                              chunked_read_size,
                               _stream,
                               _mr);
     _stream.synchronize();
   }
 }
+*/
 
 /**
  * @copydoc cudf::io::detail::parquet::decode_page_data
@@ -1762,20 +1769,36 @@ table_with_metadata reader::impl::read(size_type skip_rows,
       //
       // - for nested schemas, output buffer offset values per-page, per nesting-level for the
       // purposes of decoding.
-      preprocess_columns(chunks, pages, skip_rows, num_rows, uses_custom_row_bounds);
-
-      // decoding of column data itself
-      decode_page_data(chunks, pages, page_nesting_info, skip_rows, num_rows);
-
-      // create the final output cudf columns
-      for (size_t i = 0; i < _output_columns.size(); ++i) {
-        column_name_info& col_name = out_metadata.schema_info.emplace_back("");
-        auto const metadata =
-          _reader_column_schema.has_value()
-            ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
-            : std::nullopt;
-        out_columns.emplace_back(
-          make_column(_output_columns[i], &col_name, metadata, _stream, _mr));
+      // TODO: make this a parameter.
+      // auto const chunked_read_size = 240000;
+      auto const chunked_read_size = 0;
+      auto chunk_reads             = preprocess_columns(
+        chunks, pages, skip_rows, num_rows, uses_custom_row_bounds, chunked_read_size);
+
+      // process each chunk. this is the part that would be externalized into multiple calls
+      auto read_info = chunk_reads.second[0];
+      {
+        // allocate outgoing columns
+        allocate_columns(chunks,
+                         pages,
+                         chunk_reads.first,
+                         read_info.skip_rows,
+                         read_info.num_rows,
+                         uses_custom_row_bounds);
+
+        // decoding column data
+        decode_page_data(chunks, pages, page_nesting_info, read_info.skip_rows, read_info.num_rows);
+
+        // create the final output cudf columns
+        for (size_t i = 0; i < _output_columns.size(); ++i) {
+          column_name_info& col_name = out_metadata.schema_info.emplace_back("");
+          auto const metadata =
+            _reader_column_schema.has_value()
+              ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
+              : std::nullopt;
+          out_columns.emplace_back(
+            make_column(_output_columns[i], &col_name, metadata, _stream, _mr));
+        }
       }
     }
   }
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 6c3e05b4264..04827c16994 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -165,11 +165,20 @@ class reader::impl {
    * bounds
    * a preprocess.
    */
-  void preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                          hostdevice_vector<gpu::PageInfo>& pages,
-                          size_t min_row,
-                          size_t total_rows,
-                          bool uses_custom_row_bounds);
+  std::pair<gpu::chunked_intermediate_data, std::vector<gpu::chunked_read_info>> preprocess_columns(
+    hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+    hostdevice_vector<gpu::PageInfo>& pages,
+    size_t min_row,
+    size_t total_rows,
+    bool uses_custom_row_bounds,
+    size_type chunked_read_size);
+
+  void allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                        hostdevice_vector<gpu::PageInfo>& pages,
+                        gpu::chunked_intermediate_data const& id,
+                        size_t min_row,
+                        size_t total_rows,
+                        bool uses_custom_row_bounds);
 
   /**
    * @brief Converts the page data and outputs to columns.
diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
new file mode 100644
index 00000000000..d316fee9f07
--- /dev/null
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -0,0 +1,618 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "reader_impl.hpp"
+
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/utilities/integer_utils.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/binary_search.h>
+#include <thrust/iterator/discard_iterator.h>
+#include <thrust/sort.h>
+
+namespace cudf {
+namespace io {
+namespace detail {
+namespace parquet {
+
+// Import functionality that's independent of legacy code
+using namespace cudf::io::parquet;
+using namespace cudf::io;
+
+namespace {
+
+struct cumulative_row_info {
+  size_t row_count;   // cumulative row count
+  size_t size_bytes;  // cumulative size in bytes
+  int key;            // schema index
+};
+struct cumulative_row_sum {
+  cumulative_row_info operator()
+    __device__(cumulative_row_info const& a, cumulative_row_info const& b) const
+  {
+    return cumulative_row_info{a.row_count + b.row_count, a.size_bytes + b.size_bytes, a.key};
+  }
+};
+
+struct row_size_functor {
+  template <typename T>
+  __device__ size_t operator()(size_t num_rows, bool nullable)
+  {
+    auto const element_size = sizeof(device_storage_type_t<T>);
+    return (element_size * num_rows) +
+           (nullable ? (cudf::util::div_rounding_up_safe(num_rows, size_t{32}) / 8) : 0);
+  }
+};
+
+template <>
+__device__ size_t row_size_functor::operator()<list_view>(size_t num_rows, bool nullable)
+{
+  auto const offset_size = sizeof(offset_type);
+  return (offset_size * (num_rows + 1)) +
+         (nullable ? (cudf::util::div_rounding_up_safe(num_rows, size_t{32}) / 8) : 0);
+}
+
+template <>
+__device__ size_t row_size_functor::operator()<struct_view>(size_t num_rows, bool nullable)
+{
+  return nullable ? (cudf::util::div_rounding_up_safe(num_rows, size_t{32}) / 8) : 0;
+}
+
+template <>
+__device__ size_t row_size_functor::operator()<string_view>(size_t num_rows, bool nullable)
+{
+  // CUDF_FAIL("String types currently unsupported");
+  return 0;
+}
+
+struct get_cumulative_row_info {
+  gpu::PageInfo const* const pages;
+
+  cumulative_row_info operator() __device__(size_type index)
+  {
+    auto const& page = pages[index];
+    if (page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
+      return cumulative_row_info{0, 0, page.src_col_schema};
+    }
+    size_t const row_count = page.nesting[0].size;
+    return cumulative_row_info{
+      row_count,
+      cudf::type_dispatcher(data_type{page.nesting[0].type}, row_size_functor{}, row_count, false),
+      page.src_col_schema};
+  }
+};
+
+struct row_total_size {
+  cumulative_row_info const* const c_info;
+  size_type const* const key_offsets;
+  size_t const num_keys;
+
+  __device__ cumulative_row_info operator()(cumulative_row_info const& i)
+  {
+    // sum sizes for each input column at this row
+    size_t sum = 0;
+    for (int idx = 0; idx < num_keys; idx++) {
+      auto const start = key_offsets[idx];
+      auto const end   = key_offsets[idx + 1];
+      auto iter        = cudf::detail::make_counting_transform_iterator(
+        0, [&] __device__(size_type i) { return c_info[start + i].row_count; });
+      auto const page_index =
+        (thrust::lower_bound(thrust::seq, iter, iter + (end - start), i.row_count) - iter) + start;
+      // printf("KI(%d): start(%d), end(%d), page_index(%d), size_bytes(%lu)\n", idx, start, end,
+      // (int)page_index, c_info[page_index].size_bytes);
+      sum += c_info[page_index].size_bytes;
+    }
+    return {i.row_count, sum};
+  }
+};
+
+std::vector<gpu::chunked_read_info> compute_splits(hostdevice_vector<gpu::PageInfo>& pages,
+                                                   gpu::chunked_intermediate_data const& id,
+                                                   size_type num_rows,
+                                                   size_type chunked_read_size,
+                                                   rmm::cuda_stream_view stream)
+{
+  auto const& page_keys  = id.page_keys;
+  auto const& page_index = id.page_index;
+
+  // generate cumulative row counts and sizes
+  rmm::device_uvector<cumulative_row_info> c_info(page_keys.size(), stream);
+  // convert PageInfo to cumulative_row_info
+  auto page_input = thrust::make_transform_iterator(page_index.begin(),
+                                                    get_cumulative_row_info{pages.device_ptr()});
+  thrust::inclusive_scan_by_key(rmm::exec_policy(stream),
+                                page_keys.begin(),
+                                page_keys.end(),
+                                page_input,
+                                c_info.begin(),
+                                thrust::equal_to{},
+                                cumulative_row_sum{});
+  /*
+  stream.synchronize();
+  pages.device_to_host(stream);
+  std::vector<int> h_page_index(pages.size());
+  cudaMemcpyAsync(h_page_index.data(), page_index.begin(), sizeof(int) * pages.size(),
+  cudaMemcpyDeviceToHost, stream); stream.synchronize(); for(size_t idx=0; idx<pages.size(); idx++){
+    auto const& page = pages[h_page_index[idx]];
+    if(page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY){
+      continue;
+    }
+    gpu::PageNestingInfo pni;
+    cudaMemcpy(&pni, &page.nesting[0], sizeof(gpu::PageNestingInfo), cudaMemcpyDeviceToHost);
+    printf("P(%lu): schema(%d), num_rows(%d), nesting size 0(%d)\n", idx, page.src_col_schema,
+  page.num_rows, pni.size);
+  }
+  printf("---------\n");
+  std::vector<cumulative_row_info> h_c_info(page_keys.size());
+  cudaMemcpy(h_c_info.data(), c_info.data(), sizeof(cumulative_row_info) * page_keys.size(),
+  cudaMemcpyDeviceToHost); for(size_t idx=0; idx<page_keys.size(); idx++){ printf("C(%lu): %lu,
+  %lu\n", idx, h_c_info[idx].row_count, h_c_info[idx].size_bytes);
+  }
+  */
+
+  // sort by row count
+  rmm::device_uvector<cumulative_row_info> c_info_sorted{c_info, stream};
+  thrust::sort(rmm::exec_policy(stream),
+               c_info_sorted.begin(),
+               c_info_sorted.end(),
+               [] __device__(cumulative_row_info const& a, cumulative_row_info const& b) {
+                 return a.row_count < b.row_count;
+               });
+
+  // generate key offsets (offsets to the start of each partition of keys). worst case is 1 page per
+  // key
+  rmm::device_uvector<size_type> key_offsets(page_keys.size() + 1, stream);
+  auto [_, key_offsets_end]    = thrust::reduce_by_key(rmm::exec_policy(stream),
+                                                    page_keys.begin(),
+                                                    page_keys.end(),
+                                                    thrust::make_constant_iterator(1),
+                                                    thrust::make_discard_iterator(),
+                                                    key_offsets.begin());
+  size_t const num_unique_keys = key_offsets_end - key_offsets.begin();
+  /*
+  stream.synchronize();
+  printf("Num keys: %d\n", (int)num_unique_keys);
+  std::vector<size_type> h_key_offsets(num_unique_keys);
+  cudaMemcpy(h_key_offsets.data(), key_offsets.data(), sizeof(size_type) * num_unique_keys,
+  cudaMemcpyDeviceToHost); for(size_t idx=0; idx<num_unique_keys; idx++){ printf("Offset sizes(%lu):
+  %d\n", idx, h_key_offsets[idx]);
+  }
+  */
+
+  thrust::exclusive_scan(
+    rmm::exec_policy(stream), key_offsets.begin(), key_offsets.end() + 1, key_offsets.begin());
+  /*
+  stream.synchronize();
+  cudaMemcpy(h_key_offsets.data(), key_offsets.data(), sizeof(size_type) * (num_unique_keys + 1),
+  cudaMemcpyDeviceToHost); for(size_t idx=0; idx<num_unique_keys+1; idx++){ printf("Offset
+  values(%lu): %d\n", idx, h_key_offsets[idx]);
+  }
+  */
+
+  // adjust the cumulative info such that for each row count, the size includes any pages that span
+  // that row count. this is so that if we have this case:
+  //              page row counts
+  // Column A:    0 <----> 100 <----> 200
+  // Column B:    0 <---------------> 200 <--------> 400
+  //                        |
+  // if we decide to split at row 100, we don't really know the actual amount of bytes in column B
+  // at that point.  So we have to proceed as if we are taking the bytes from all 200 rows of that
+  // page.
+  //
+  rmm::device_uvector<cumulative_row_info> adjusted(c_info.size(), stream);
+  thrust::transform(rmm::exec_policy(stream),
+                    c_info_sorted.begin(),
+                    c_info_sorted.end(),
+                    adjusted.begin(),
+                    row_total_size{c_info.data(), key_offsets.data(), num_unique_keys});
+
+  // bring back to the cpu
+  std::vector<cumulative_row_info> h_adjusted(adjusted.size());
+  cudaMemcpyAsync(h_adjusted.data(),
+                  adjusted.data(),
+                  sizeof(cumulative_row_info) * c_info.size(),
+                  cudaMemcpyDeviceToHost,
+                  stream);
+  stream.synchronize();
+  /*
+  for(size_t idx=0; idx<h_adjusted.size(); idx++){
+    printf("A(%lu): %lu, %lu\n", idx, h_adjusted[idx].row_count, h_adjusted[idx].size_bytes);
+  }
+  */
+
+  // now we have an array of {row_count, real output bytes}. just walk through it and generate
+  // splits.
+  // TODO: come up with a clever way to do this entirely in parallel. For now, as long as batch
+  // sizes are reasonably large, this shouldn't iterate too many times
+  std::vector<gpu::chunked_read_info> splits;
+  {
+    size_t cur_pos         = 0;
+    size_t cumulative_size = 0;
+    size_t cur_row_count   = 0;
+    while (cur_row_count < static_cast<size_t>(num_rows)) {
+      auto iter = thrust::make_transform_iterator(
+        h_adjusted.begin() + cur_pos,
+        [cumulative_size](cumulative_row_info const& i) { return i.size_bytes - cumulative_size; });
+      size_type p =
+        (thrust::lower_bound(
+           thrust::seq, iter, iter + h_adjusted.size(), static_cast<size_t>(chunked_read_size)) -
+         iter) +
+        cur_pos;
+      if (h_adjusted[p].size_bytes - cumulative_size > static_cast<size_t>(chunked_read_size) ||
+          static_cast<size_t>(p) == h_adjusted.size()) {
+        p--;
+      }
+      if (h_adjusted[p].row_count == cur_row_count || p < 0) {
+        CUDF_FAIL("Cannot find read split boundary small enough");
+      }
+
+      auto const start_row = cur_row_count;
+      cur_row_count        = h_adjusted[p].row_count;
+      splits.push_back(gpu::chunked_read_info{start_row, cur_row_count - start_row});
+      cur_pos         = p;
+      cumulative_size = h_adjusted[p].size_bytes;
+    }
+  }
+
+  return splits;
+}
+
+struct get_page_chunk_idx {
+  __device__ size_type operator()(gpu::PageInfo const& page) { return page.chunk_idx; }
+};
+
+struct get_page_num_rows {
+  __device__ size_type operator()(gpu::PageInfo const& page) { return page.num_rows; }
+};
+
+struct get_page_schema {
+  __device__ size_type operator()(gpu::PageInfo const& page) { return page.src_col_schema; }
+};
+
+struct get_page_nesting_size {
+  size_type const src_col_schema;
+  size_type const depth;
+  gpu::PageInfo const* const pages;
+
+  __device__ size_type operator()(int index)
+  {
+    auto const& page = pages[index];
+    if (page.src_col_schema != src_col_schema || page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
+      return 0;
+    }
+    return page.nesting[depth].size;
+  }
+};
+
+struct chunk_row_output_iter {
+  gpu::PageInfo* p;
+  using value_type        = size_type;
+  using difference_type   = size_type;
+  using pointer           = size_type*;
+  using reference         = size_type&;
+  using iterator_category = thrust::output_device_iterator_tag;
+
+  __host__ __device__ chunk_row_output_iter operator+(int i)
+  {
+    return chunk_row_output_iter{p + i};
+  }
+
+  __host__ __device__ void operator++() { p++; }
+
+  __device__ reference operator[](int i) { return p[i].chunk_row; }
+  __device__ reference operator*() { return p->chunk_row; }
+  // __device__ void operator=(value_type v) { p->chunk_row = v; }
+};
+
+struct start_offset_output_iterator {
+  gpu::PageInfo* pages;
+  int const* page_indices;
+  int cur_index;
+  int src_col_schema;
+  int nesting_depth;
+  int empty               = 0;
+  using value_type        = size_type;
+  using difference_type   = size_type;
+  using pointer           = size_type*;
+  using reference         = size_type&;
+  using iterator_category = thrust::output_device_iterator_tag;
+
+  __host__ __device__ void operator=(start_offset_output_iterator const& other)
+  {
+    pages          = other.pages;
+    page_indices   = other.page_indices;
+    cur_index      = other.cur_index;
+    src_col_schema = other.src_col_schema;
+    nesting_depth  = other.nesting_depth;
+  }
+
+  __host__ __device__ start_offset_output_iterator operator+(int i)
+  {
+    return start_offset_output_iterator{
+      pages, page_indices, cur_index + i, src_col_schema, nesting_depth};
+  }
+
+  __host__ __device__ void operator++() { cur_index++; }
+
+  __device__ reference operator[](int i) { return dereference(cur_index + i); }
+  __device__ reference operator*() { return dereference(cur_index); }
+
+ private:
+  __device__ reference dereference(int index)
+  {
+    gpu::PageInfo const& p = pages[page_indices[index]];
+    if (p.src_col_schema != src_col_schema || p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
+      return empty;
+    }
+    return p.nesting[nesting_depth].page_start_value;
+  }
+};
+
+}  // anonymous namespace
+
+/**
+ * @copydoc cudf::io::detail::parquet::preprocess_columns
+ */
+std::pair<gpu::chunked_intermediate_data, std::vector<gpu::chunked_read_info>>
+reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                                 hostdevice_vector<gpu::PageInfo>& pages,
+                                 size_t min_row,
+                                 size_t num_rows,
+                                 bool uses_custom_row_bounds,
+                                 size_type chunked_read_size)
+{
+  // iterate over all input columns and determine if they contain lists so we can further
+  // preprocess them.
+  bool has_lists = false;
+  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+    auto const& input_col  = _input_columns[idx];
+    size_t const max_depth = input_col.nesting_depth();
+
+    auto* cols = &_output_columns;
+    for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
+      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+      cols          = &out_buf.children;
+
+      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
+      // to know how big this buffer actually is.
+      if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
+        has_lists = true;
+        break;
+      }
+    }
+    if (has_lists) { break; }
+  }
+
+  // intermediate data we will need for further chunked reads
+  gpu::chunked_intermediate_data id;
+  if (has_lists || chunked_read_size > 0) {
+    // computes:
+    // PageNestingInfo::size for each level of nesting, for each page.
+    // This computes the size for the entire page, not taking row bounds into account.
+    /*
+    gpuComputePageSizes<<<dim_grid, dim_block, 0, stream.value()>>>(
+      pages.device_ptr(),
+      chunks,
+      // if uses_custom_row_bounds is false, include all possible rows.
+      uses_custom_row_bounds ? min_row : 0,
+      uses_custom_row_bounds ? num_rows : INT_MAX,
+      !uses_custom_row_bounds);
+    */
+    // we will be applying a later trim pass if skip_rows/num_rows is being used, which can happen
+    // if:
+    // - user has passed custom row bounds
+    // - if we will be doing a chunked read
+    auto const will_trim_later = uses_custom_row_bounds || chunked_read_size > 0;
+    gpu::ComputePageSizes(pages,
+                          chunks,
+                          !will_trim_later ? min_row : 0,
+                          !will_trim_later ? num_rows : INT_MAX,
+                          !will_trim_later,
+                          _stream);
+
+    // computes:
+    // PageInfo::chunk_row for all pages
+    // Note: this is doing some redundant work for pages in flat hierarchies.  chunk_row has already
+    // been computed during header decoding. the overall amount of work here is very small though.
+    auto key_input  = thrust::make_transform_iterator(pages.device_ptr(), get_page_chunk_idx{});
+    auto page_input = thrust::make_transform_iterator(pages.device_ptr(), get_page_num_rows{});
+    thrust::exclusive_scan_by_key(rmm::exec_policy(_stream),
+                                  key_input,
+                                  key_input + pages.size(),
+                                  page_input,
+                                  chunk_row_output_iter{pages.device_ptr()});
+
+    // compute page ordering.
+    //
+    // ordering of pages is by input column schema, repeated across row groups.  so
+    // if we had 3 columns, each with 2 pages, and 1 row group, our schema values might look like
+    //
+    // 1, 1, 2, 2, 3, 3
+    //
+    // However, if we had more than one row group, the pattern would be
+    //
+    // 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3
+    // ^ row group 0     |
+    //                   ^ row group 1
+    //
+    // To use exclusive_scan_by_key, the ordering we actually want is
+    //
+    // 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3
+    //
+    // We also need to preserve key-relative page ordering, so we need to use a stable sort.
+    id.page_keys     = rmm::device_uvector<int>(pages.size(), _stream);
+    id.page_index    = rmm::device_uvector<int>(pages.size(), _stream);
+    auto& page_keys  = id.page_keys;
+    auto& page_index = id.page_index;
+    {
+      thrust::transform(rmm::exec_policy(_stream),
+                        pages.device_ptr(),
+                        pages.device_ptr() + pages.size(),
+                        page_keys.begin(),
+                        get_page_schema{});
+
+      thrust::sequence(rmm::exec_policy(_stream), page_index.begin(), page_index.end());
+      thrust::stable_sort_by_key(rmm::exec_policy(_stream),
+                                 page_keys.begin(),
+                                 page_keys.end(),
+                                 page_index.begin(),
+                                 thrust::less<int>());
+    }
+
+    // retrieve pages back
+    pages.device_to_host(_stream, true);
+  }
+
+  // compute splits if necessary.
+  std::vector<gpu::chunked_read_info> read_chunks =
+    chunked_read_size > 0 ? compute_splits(pages, id, num_rows, chunked_read_size, _stream)
+                          : std::vector<gpu::chunked_read_info>{{min_row, num_rows}};
+
+  return {std::move(id), std::move(read_chunks)};
+}
+
+void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                                    hostdevice_vector<gpu::PageInfo>& pages,
+                                    gpu::chunked_intermediate_data const& id,
+                                    size_t min_row,
+                                    size_t num_rows,
+                                    bool uses_custom_row_bounds)
+{
+  // computes:
+  // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
+  // PageInfo::skipped_values, which tells us where to start decoding in the input.
+  // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
+  // specified artifical bounds).
+  if (uses_custom_row_bounds) {
+    gpu::ComputePageSizes(pages, chunks, min_row, num_rows, true, _stream);
+  }
+
+  // iterate over all input columns and allocate any associated output
+  // buffers if they are not part of a list hierarchy. mark down
+  // if we have any list columns that need further processing.
+  bool has_lists = false;
+  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+    auto const& input_col  = _input_columns[idx];
+    size_t const max_depth = input_col.nesting_depth();
+
+    auto* cols = &_output_columns;
+    for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
+      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+      cols          = &out_buf.children;
+
+      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
+      // to know how big this buffer actually is.
+      if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
+        has_lists = true;
+      }
+      // if we haven't already processed this column because it is part of a struct hierarchy
+      else if (out_buf.size == 0) {
+        // add 1 for the offset if this is a list column
+        out_buf.create(
+          out_buf.type.id() == type_id::LIST && l_idx < max_depth ? num_rows + 1 : num_rows,
+          _stream,
+          _mr);
+      }
+    }
+  }
+
+  // compute output column sizes by examining the pages of the -input- columns
+  if (has_lists) {
+    auto& page_keys  = id.page_keys;
+    auto& page_index = id.page_index;
+    for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+      auto const& input_col = _input_columns[idx];
+      auto src_col_schema   = input_col.schema_idx;
+      size_t max_depth      = input_col.nesting_depth();
+
+      auto* cols = &_output_columns;
+      for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
+        auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+        cols          = &out_buf.children;
+
+        // size iterator. indexes pages by sorted order
+        auto size_input = thrust::make_transform_iterator(
+          page_index.begin(),
+          get_page_nesting_size{src_col_schema, static_cast<size_type>(l_idx), pages.device_ptr()});
+
+        // if this buffer is part of a list hierarchy, we need to determine it's
+        // final size and allocate it here.
+        //
+        // for struct columns, higher levels of the output columns are shared between input
+        // columns. so don't compute any given level more than once.
+        if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) && out_buf.size == 0) {
+          int size =
+            thrust::reduce(rmm::exec_policy(_stream), size_input, size_input + pages.size());
+
+          // if this is a list column add 1 for non-leaf levels for the terminating offset
+          if (out_buf.type.id() == type_id::LIST && l_idx < max_depth) { size++; }
+
+          // allocate
+          out_buf.create(size, _stream, _mr);
+        }
+
+        // for nested hierarchies, compute per-page start offset.
+        // it would be better/safer to be checking (schema.max_repetition_level > 0) here, but
+        // there's no easy way to get at that info here. we'd have to move this function into
+        // reader_impl.cu
+        if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) ||
+            out_buf.type.id() == type_id::LIST) {
+          thrust::exclusive_scan_by_key(
+            rmm::exec_policy(_stream),
+            page_keys.begin(),
+            page_keys.end(),
+            size_input,
+            start_offset_output_iterator{pages.device_ptr(),
+                                         page_index.begin(),
+                                         0,
+                                         static_cast<int>(src_col_schema),
+                                         static_cast<int>(l_idx)});
+        }
+      }
+    }
+  }
+}
+
+/*
+{
+  std::mt19937 gen(6542);
+  std::bernoulli_distribution bn(0.7f);
+  //auto valids =
+//    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
+  auto values = thrust::make_counting_iterator(0);
+
+  constexpr size_type num_rows = 40000;
+  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
+  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
+
+  cudf::table_view t({a, b});
+  cudf::io::parquet_writer_options opts =
+cudf::io::parquet_writer_options::builder(cudf::io::sink_info{"parquet/tmp/chunked_splits.parquet"},
+t); cudf::io::write_parquet(opts);
+
+  cudf::io::parquet_reader_options in_opts =
+cudf::io::parquet_reader_options::builder(cudf::io::source_info{"parquet/tmp/chunked_splits.parquet"});
+  auto result = cudf::io::read_parquet(in_opts);
+}
+*/
+
+}  // namespace parquet
+}  // namespace detail
+}  // namespace io
+}  // namespace cudf

From 583a7ef37eb964094ca0e6a32b2bd70c056457aa Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 11 Oct 2022 13:06:38 -0700
Subject: [PATCH 021/162] Add test

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu |    6 +-
 cpp/tests/io/parquet_test.cpp     | 4727 +----------------------------
 2 files changed, 19 insertions(+), 4714 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 4e48aa6d634..e0962520570 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1770,9 +1770,9 @@ table_with_metadata reader::impl::read(size_type skip_rows,
       // - for nested schemas, output buffer offset values per-page, per nesting-level for the
       // purposes of decoding.
       // TODO: make this a parameter.
-      // auto const chunked_read_size = 240000;
-      auto const chunked_read_size = 0;
-      auto chunk_reads             = preprocess_columns(
+      auto const chunked_read_size = 240000;
+      //      auto const chunked_read_size = 0;
+      auto chunk_reads = preprocess_columns(
         chunks, pages, skip_rows, num_rows, uses_custom_row_bounds, chunked_read_size);
 
       // process each chunk. this is the part that would be externalized into multiple calls
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 134eff54144..12b806ce788 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -45,4723 +45,28 @@
 #include <fstream>
 #include <type_traits>
 
-template <typename T, typename SourceElementT = T>
-using column_wrapper =
-  typename std::conditional<std::is_same_v<T, cudf::string_view>,
-                            cudf::test::strings_column_wrapper,
-                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
-using column     = cudf::column;
-using table      = cudf::table;
-using table_view = cudf::table_view;
-
-// Global environment for temporary files
-auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
-  ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
-
-template <typename T, typename Elements>
-std::unique_ptr<cudf::table> create_fixed_table(cudf::size_type num_columns,
-                                                cudf::size_type num_rows,
-                                                bool include_validity,
-                                                Elements elements)
-{
-  auto valids = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return i % 2 == 0 ? true : false; });
-  std::vector<cudf::test::fixed_width_column_wrapper<T>> src_cols(num_columns);
-  for (int idx = 0; idx < num_columns; idx++) {
-    if (include_validity) {
-      src_cols[idx] =
-        cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_rows, valids);
-    } else {
-      src_cols[idx] = cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_rows);
-    }
-  }
-  std::vector<std::unique_ptr<cudf::column>> columns(num_columns);
-  std::transform(src_cols.begin(),
-                 src_cols.end(),
-                 columns.begin(),
-                 [](cudf::test::fixed_width_column_wrapper<T>& in) {
-                   auto ret = in.release();
-                   // pre-cache the null count
-                   [[maybe_unused]] auto const nulls = ret->has_nulls();
-                   return ret;
-                 });
-  return std::make_unique<cudf::table>(std::move(columns));
-}
-
-template <typename T>
-std::unique_ptr<cudf::table> create_random_fixed_table(cudf::size_type num_columns,
-                                                       cudf::size_type num_rows,
-                                                       bool include_validity)
-{
-  auto rand_elements =
-    cudf::detail::make_counting_transform_iterator(0, [](T i) { return rand(); });
-  return create_fixed_table<T>(num_columns, num_rows, include_validity, rand_elements);
-}
-
-template <typename T>
-std::unique_ptr<cudf::table> create_compressible_fixed_table(cudf::size_type num_columns,
-                                                             cudf::size_type num_rows,
-                                                             cudf::size_type period,
-                                                             bool include_validity)
-{
-  auto compressible_elements =
-    cudf::detail::make_counting_transform_iterator(0, [period](T i) { return i / period; });
-  return create_fixed_table<T>(num_columns, num_rows, include_validity, compressible_elements);
-}
-
-// this function replicates the "list_gen" function in
-// python/cudf/cudf/tests/test_parquet.py
-template <typename T>
-std::unique_ptr<cudf::column> make_parquet_list_list_col(
-  int skip_rows, int num_rows, int lists_per_row, int list_size, bool include_validity)
-{
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0 ? 1 : 0; });
-
-  // root list
-  std::vector<int> row_offsets(num_rows + 1);
-  int row_offset_count = 0;
-  {
-    int offset = 0;
-    for (int idx = 0; idx < (num_rows) + 1; idx++) {
-      row_offsets[row_offset_count] = offset;
-      if (!include_validity || valids[idx]) { offset += lists_per_row; }
-      row_offset_count++;
-    }
-  }
-  cudf::test::fixed_width_column_wrapper<int> offsets(row_offsets.begin(),
-                                                      row_offsets.begin() + row_offset_count);
-
-  // child list
-  std::vector<int> child_row_offsets((num_rows * lists_per_row) + 1);
-  int child_row_offset_count = 0;
-  {
-    int offset = 0;
-    for (int idx = 0; idx < (num_rows * lists_per_row); idx++) {
-      int row_index = idx / lists_per_row;
-      if (include_validity && !valids[row_index]) { continue; }
-
-      child_row_offsets[child_row_offset_count] = offset;
-      offset += list_size;
-      child_row_offset_count++;
-    }
-    child_row_offsets[child_row_offset_count++] = offset;
-  }
-  cudf::test::fixed_width_column_wrapper<int> child_offsets(
-    child_row_offsets.begin(), child_row_offsets.begin() + child_row_offset_count);
-
-  // child values
-  std::vector<T> child_values(num_rows * lists_per_row * list_size);
-  T first_child_value_index = skip_rows * lists_per_row * list_size;
-  int child_value_count     = 0;
-  {
-    for (int idx = 0; idx < (num_rows * lists_per_row * list_size); idx++) {
-      int row_index = idx / (lists_per_row * list_size);
-
-      int val = first_child_value_index;
-      first_child_value_index++;
-
-      if (include_validity && !valids[row_index]) { continue; }
-
-      child_values[child_value_count] = val;
-      child_value_count++;
-    }
-  }
-  // validity by value instead of index
-  auto valids2 = cudf::detail::make_counting_transform_iterator(
-    0, [list_size](auto i) { return (i % list_size) % 2 == 0 ? 1 : 0; });
-  auto child_data = include_validity
-                      ? cudf::test::fixed_width_column_wrapper<T>(
-                          child_values.begin(), child_values.begin() + child_value_count, valids2)
-                      : cudf::test::fixed_width_column_wrapper<T>(
-                          child_values.begin(), child_values.begin() + child_value_count);
-
-  int child_offsets_size = static_cast<cudf::column_view>(child_offsets).size() - 1;
-  auto child             = cudf::make_lists_column(
-    child_offsets_size, child_offsets.release(), child_data.release(), 0, rmm::device_buffer{});
-
-  int offsets_size = static_cast<cudf::column_view>(offsets).size() - 1;
-  return include_validity
-           ? cudf::make_lists_column(
-               offsets_size,
-               offsets.release(),
-               std::move(child),
-               cudf::UNKNOWN_NULL_COUNT,
-               cudf::test::detail::make_null_mask(valids, valids + offsets_size))
-           : cudf::make_lists_column(
-               offsets_size, offsets.release(), std::move(child), 0, rmm::device_buffer{});
-}
-
-// given a datasource pointing to a parquet file, read the footer
-// of the file to populate the FileMetaData pointed to by file_meta_data.
-// throws cudf::logic_error if the file or metadata is invalid.
-void read_footer(const std::unique_ptr<cudf::io::datasource>& source,
-                 cudf::io::parquet::FileMetaData* file_meta_data)
-{
-  constexpr auto header_len = sizeof(cudf::io::parquet::file_header_s);
-  constexpr auto ender_len  = sizeof(cudf::io::parquet::file_ender_s);
-
-  const auto len           = source->size();
-  const auto header_buffer = source->host_read(0, header_len);
-  const auto header =
-    reinterpret_cast<const cudf::io::parquet::file_header_s*>(header_buffer->data());
-  const auto ender_buffer = source->host_read(len - ender_len, ender_len);
-  const auto ender = reinterpret_cast<const cudf::io::parquet::file_ender_s*>(ender_buffer->data());
-
-  // checks for valid header, footer, and file length
-  CUDF_EXPECTS(len > header_len + ender_len, "Incorrect data source");
-  CUDF_EXPECTS(header->magic == cudf::io::parquet::parquet_magic &&
-                 ender->magic == cudf::io::parquet::parquet_magic,
-               "Corrupted header or footer");
-  CUDF_EXPECTS(ender->footer_len != 0 && ender->footer_len <= (len - header_len - ender_len),
-               "Incorrect footer length");
-
-  // parquet files end with 4-byte footer_length and 4-byte magic == "PAR1"
-  // seek backwards from the end of the file (footer_length + 8 bytes of ender)
-  const auto footer_buffer =
-    source->host_read(len - ender->footer_len - ender_len, ender->footer_len);
-  cudf::io::parquet::CompactProtocolReader cp(footer_buffer->data(), ender->footer_len);
-
-  // returns true on success
-  bool res = cp.read(file_meta_data);
-  CUDF_EXPECTS(res, "Cannot parse file metadata");
-}
-
-// returns the number of bits used for dictionary encoding data at the given page location.
-// this assumes the data is uncompressed.
-// throws cudf::logic_error if the page_loc data is invalid.
-int read_dict_bits(const std::unique_ptr<cudf::io::datasource>& source,
-                   const cudf::io::parquet::PageLocation& page_loc)
-{
-  CUDF_EXPECTS(page_loc.offset > 0, "Cannot find page header");
-  CUDF_EXPECTS(page_loc.compressed_page_size > 0, "Invalid page header length");
-
-  cudf::io::parquet::PageHeader page_hdr;
-  const auto page_buf = source->host_read(page_loc.offset, page_loc.compressed_page_size);
-  cudf::io::parquet::CompactProtocolReader cp(page_buf->data(), page_buf->size());
-  bool res = cp.read(&page_hdr);
-  CUDF_EXPECTS(res, "Cannot parse page header");
-
-  // cp should be pointing at the start of page data now. the first byte
-  // should be the encoding bit size
-  return cp.getb();
-}
-
-// read column index from datasource at location indicated by chunk,
-// parse and return as a ColumnIndex struct.
-// throws cudf::logic_error if the chunk data is invalid.
-cudf::io::parquet::ColumnIndex read_column_index(
-  const std::unique_ptr<cudf::io::datasource>& source, const cudf::io::parquet::ColumnChunk& chunk)
-{
-  CUDF_EXPECTS(chunk.column_index_offset > 0, "Cannot find column index");
-  CUDF_EXPECTS(chunk.column_index_length > 0, "Invalid column index length");
-
-  cudf::io::parquet::ColumnIndex colidx;
-  const auto ci_buf = source->host_read(chunk.column_index_offset, chunk.column_index_length);
-  cudf::io::parquet::CompactProtocolReader cp(ci_buf->data(), ci_buf->size());
-  bool res = cp.read(&colidx);
-  CUDF_EXPECTS(res, "Cannot parse column index");
-  return colidx;
-}
-
-// read offset index from datasource at location indicated by chunk,
-// parse and return as an OffsetIndex struct.
-// throws cudf::logic_error if the chunk data is invalid.
-cudf::io::parquet::OffsetIndex read_offset_index(
-  const std::unique_ptr<cudf::io::datasource>& source, const cudf::io::parquet::ColumnChunk& chunk)
-{
-  CUDF_EXPECTS(chunk.offset_index_offset > 0, "Cannot find offset index");
-  CUDF_EXPECTS(chunk.offset_index_length > 0, "Invalid offset index length");
-
-  cudf::io::parquet::OffsetIndex offidx;
-  const auto oi_buf = source->host_read(chunk.offset_index_offset, chunk.offset_index_length);
-  cudf::io::parquet::CompactProtocolReader cp(oi_buf->data(), oi_buf->size());
-  bool res = cp.read(&offidx);
-  CUDF_EXPECTS(res, "Cannot parse offset index");
-  return offidx;
-}
-
-// parse the statistics_blob on chunk and return as a Statistics struct.
-// throws cudf::logic_error if the chunk statistics_blob is invalid.
-cudf::io::parquet::Statistics parse_statistics(const cudf::io::parquet::ColumnChunk& chunk)
-{
-  auto& stats_blob = chunk.meta_data.statistics_blob;
-  CUDF_EXPECTS(stats_blob.size() > 0, "Invalid statistics length");
-
-  cudf::io::parquet::Statistics stats;
-  cudf::io::parquet::CompactProtocolReader cp(stats_blob.data(), stats_blob.size());
-  bool res = cp.read(&stats);
-  CUDF_EXPECTS(res, "Cannot parse column statistics");
-  return stats;
-}
-
-// read page header from datasource at location indicated by page_loc,
-// parse and return as a PageHeader struct.
-// throws cudf::logic_error if the page_loc data is invalid.
-cudf::io::parquet::PageHeader read_page_header(const std::unique_ptr<cudf::io::datasource>& source,
-                                               const cudf::io::parquet::PageLocation& page_loc)
-{
-  CUDF_EXPECTS(page_loc.offset > 0, "Cannot find page header");
-  CUDF_EXPECTS(page_loc.compressed_page_size > 0, "Invalid page header length");
-
-  cudf::io::parquet::PageHeader page_hdr;
-  const auto page_buf = source->host_read(page_loc.offset, page_loc.compressed_page_size);
-  cudf::io::parquet::CompactProtocolReader cp(page_buf->data(), page_buf->size());
-  bool res = cp.read(&page_hdr);
-  CUDF_EXPECTS(res, "Cannot parse page header");
-  return page_hdr;
-}
-
-// Base test fixture for tests
-struct ParquetWriterTest : public cudf::test::BaseFixture {
-};
-
-// Base test fixture for tests
-struct ParquetReaderTest : public cudf::test::BaseFixture {
-};
-
-// Base test fixture for "stress" tests
-struct ParquetWriterStressTest : public cudf::test::BaseFixture {
-};
-
-// Typed test fixture for numeric type tests
-template <typename T>
-struct ParquetWriterNumericTypeTest : public ParquetWriterTest {
-  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
-};
-
-// Typed test fixture for comparable type tests
-template <typename T>
-struct ParquetWriterComparableTypeTest : public ParquetWriterTest {
-  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
-};
-
-// Typed test fixture for timestamp type tests
-template <typename T>
-struct ParquetWriterChronoTypeTest : public ParquetWriterTest {
-  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
-};
-
-// Typed test fixture for timestamp type tests
-template <typename T>
-struct ParquetWriterTimestampTypeTest : public ParquetWriterTest {
-  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
-};
-
-// Typed test fixture for all types
-template <typename T>
-struct ParquetWriterSchemaTest : public ParquetWriterTest {
-  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
-};
-
-// Declare typed test cases
-// TODO: Replace with `NumericTypes` when unsigned support is added. Issue #5352
-using SupportedTypes = cudf::test::Types<int8_t, int16_t, int32_t, int64_t, bool, float, double>;
-TYPED_TEST_SUITE(ParquetWriterNumericTypeTest, SupportedTypes);
-using ComparableAndFixedTypes =
-  cudf::test::Concat<cudf::test::ComparableTypes, cudf::test::FixedPointTypes>;
-TYPED_TEST_SUITE(ParquetWriterComparableTypeTest, ComparableAndFixedTypes);
-TYPED_TEST_SUITE(ParquetWriterChronoTypeTest, cudf::test::ChronoTypes);
-using SupportedTimestampTypes =
-  cudf::test::Types<cudf::timestamp_ms, cudf::timestamp_us, cudf::timestamp_ns>;
-TYPED_TEST_SUITE(ParquetWriterTimestampTypeTest, SupportedTimestampTypes);
-TYPED_TEST_SUITE(ParquetWriterSchemaTest, cudf::test::AllTypes);
-
-// Base test fixture for chunked writer tests
-struct ParquetChunkedWriterTest : public cudf::test::BaseFixture {
-};
-
-// Typed test fixture for numeric type tests
-template <typename T>
-struct ParquetChunkedWriterNumericTypeTest : public ParquetChunkedWriterTest {
-  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
-};
-
-// Declare typed test cases
-TYPED_TEST_SUITE(ParquetChunkedWriterNumericTypeTest, SupportedTypes);
-
-// Base test fixture for size-parameterized tests
-class ParquetSizedTest : public ::testing::TestWithParam<int> {
+struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
 };
 
-// test the allowed bit widths for dictionary encoding
-// values chosen to trigger 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, and 24 bit dictionaries
-INSTANTIATE_TEST_SUITE_P(ParquetDictionaryTest,
-                         ParquetSizedTest,
-                         testing::Range(1, 25),
-                         testing::PrintToStringParamName());
-
-namespace {
-// Generates a vector of uniform random values of type T
-template <typename T>
-inline auto random_values(size_t size)
-{
-  std::vector<T> values(size);
-
-  using T1 = T;
-  using uniform_distribution =
-    typename std::conditional_t<std::is_same_v<T1, bool>,
-                                std::bernoulli_distribution,
-                                std::conditional_t<std::is_floating_point_v<T1>,
-                                                   std::uniform_real_distribution<T1>,
-                                                   std::uniform_int_distribution<T1>>>;
-
-  static constexpr auto seed = 0xf00d;
-  static std::mt19937 engine{seed};
-  static uniform_distribution dist{};
-  std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; });
-
-  return values;
-}
-
-}  // namespace
-
-TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumn)
-{
-  auto sequence =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return TypeParam(i % 400); });
-  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  constexpr auto num_rows = 800;
-  column_wrapper<TypeParam> col(sequence, sequence + num_rows, validity);
-
-  auto expected = table_view{{col}};
-
-  auto filepath = temp_env->get_temp_filepath("SingleColumn.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumnWithNulls)
-{
-  auto sequence =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return TypeParam(i); });
-  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2); });
-
-  constexpr auto num_rows = 100;
-  column_wrapper<TypeParam> col(sequence, sequence + num_rows, validity);
-
-  auto expected = table_view{{col}};
-
-  auto filepath = temp_env->get_temp_filepath("SingleColumnWithNulls.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TYPED_TEST(ParquetWriterChronoTypeTest, Chronos)
-{
-  auto sequence = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return ((std::rand() / 10000) * 1000); });
-  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  constexpr auto num_rows = 100;
-  column_wrapper<TypeParam, typename decltype(sequence)::value_type> col(
-    sequence, sequence + num_rows, validity);
-
-  auto expected = table_view{{col}};
-
-  auto filepath = temp_env->get_temp_filepath("Chronos.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .timestamp_type(this->type());
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TYPED_TEST(ParquetWriterChronoTypeTest, ChronosWithNulls)
-{
-  auto sequence = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return ((std::rand() / 10000) * 1000); });
-  auto validity =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 30) && (i < 60); });
-
-  constexpr auto num_rows = 100;
-  column_wrapper<TypeParam, typename decltype(sequence)::value_type> col(
-    sequence, sequence + num_rows, validity);
-
-  auto expected = table_view{{col}};
-
-  auto filepath = temp_env->get_temp_filepath("ChronosWithNulls.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .timestamp_type(this->type());
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TYPED_TEST(ParquetWriterTimestampTypeTest, TimestampOverflow)
-{
-  constexpr int64_t max = std::numeric_limits<int64_t>::max();
-  auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return max - i; });
-  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  constexpr auto num_rows = 100;
-  column_wrapper<TypeParam, typename decltype(sequence)::value_type> col(
-    sequence, sequence + num_rows, validity);
-  table_view expected({col});
-
-  auto filepath = temp_env->get_temp_filepath("ParquetTimestampOverflow.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .timestamp_type(this->type());
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TEST_F(ParquetWriterTest, MultiColumn)
-{
-  constexpr auto num_rows = 100000;
-
-  // auto col0_data = random_values<bool>(num_rows);
-  auto col1_data = random_values<int8_t>(num_rows);
-  auto col2_data = random_values<int16_t>(num_rows);
-  auto col3_data = random_values<int32_t>(num_rows);
-  auto col4_data = random_values<float>(num_rows);
-  auto col5_data = random_values<double>(num_rows);
-  auto col6_vals = random_values<int16_t>(num_rows);
-  auto col7_vals = random_values<int32_t>(num_rows);
-  auto col8_vals = random_values<int64_t>(num_rows);
-  auto col6_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) {
-    return numeric::decimal32{col6_vals[i], numeric::scale_type{5}};
-  });
-  auto col7_data = cudf::detail::make_counting_transform_iterator(0, [col7_vals](auto i) {
-    return numeric::decimal64{col7_vals[i], numeric::scale_type{-5}};
-  });
-  auto col8_data = cudf::detail::make_counting_transform_iterator(0, [col8_vals](auto i) {
-    return numeric::decimal128{col8_vals[i], numeric::scale_type{-6}};
-  });
-  auto validity  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  // column_wrapper<bool> col0{
-  //    col0_data.begin(), col0_data.end(), validity};
-  column_wrapper<int8_t> col1{col1_data.begin(), col1_data.end(), validity};
-  column_wrapper<int16_t> col2{col2_data.begin(), col2_data.end(), validity};
-  column_wrapper<int32_t> col3{col3_data.begin(), col3_data.end(), validity};
-  column_wrapper<float> col4{col4_data.begin(), col4_data.end(), validity};
-  column_wrapper<double> col5{col5_data.begin(), col5_data.end(), validity};
-  column_wrapper<numeric::decimal32> col6{col6_data, col6_data + num_rows, validity};
-  column_wrapper<numeric::decimal64> col7{col7_data, col7_data + num_rows, validity};
-  column_wrapper<numeric::decimal128> col8{col8_data, col8_data + num_rows, validity};
-
-  auto expected = table_view{{col1, col2, col3, col4, col5, col6, col7, col8}};
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  // expected_metadata.column_metadata[0].set_name( "bools");
-  expected_metadata.column_metadata[0].set_name("int8s");
-  expected_metadata.column_metadata[1].set_name("int16s");
-  expected_metadata.column_metadata[2].set_name("int32s");
-  expected_metadata.column_metadata[3].set_name("floats");
-  expected_metadata.column_metadata[4].set_name("doubles");
-  expected_metadata.column_metadata[5].set_name("decimal32s").set_decimal_precision(10);
-  expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20);
-  expected_metadata.column_metadata[7].set_name("decimal128s").set_decimal_precision(40);
-
-  auto filepath = temp_env->get_temp_filepath("MultiColumn.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(&expected_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetWriterTest, MultiColumnWithNulls)
-{
-  constexpr auto num_rows = 100;
-
-  // auto col0_data = random_values<bool>(num_rows);
-  auto col1_data = random_values<int8_t>(num_rows);
-  auto col2_data = random_values<int16_t>(num_rows);
-  auto col3_data = random_values<int32_t>(num_rows);
-  auto col4_data = random_values<float>(num_rows);
-  auto col5_data = random_values<double>(num_rows);
-  auto col6_vals = random_values<int32_t>(num_rows);
-  auto col7_vals = random_values<int64_t>(num_rows);
-  auto col6_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) {
-    return numeric::decimal32{col6_vals[i], numeric::scale_type{-2}};
-  });
-  auto col7_data = cudf::detail::make_counting_transform_iterator(0, [col7_vals](auto i) {
-    return numeric::decimal64{col7_vals[i], numeric::scale_type{-8}};
-  });
-  // auto col0_mask = cudf::detail::make_counting_transform_iterator(
-  //    0, [](auto i) { return (i % 2); });
-  auto col1_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i < 10); });
-  auto col2_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-  auto col3_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i == (num_rows - 1)); });
-  auto col4_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 && i <= 60); });
-  auto col5_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 80); });
-  auto col6_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 5); });
-  auto col7_mask =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i != 55); });
-
-  // column_wrapper<bool> col0{
-  //    col0_data.begin(), col0_data.end(), col0_mask};
-  column_wrapper<int8_t> col1{col1_data.begin(), col1_data.end(), col1_mask};
-  column_wrapper<int16_t> col2{col2_data.begin(), col2_data.end(), col2_mask};
-  column_wrapper<int32_t> col3{col3_data.begin(), col3_data.end(), col3_mask};
-  column_wrapper<float> col4{col4_data.begin(), col4_data.end(), col4_mask};
-  column_wrapper<double> col5{col5_data.begin(), col5_data.end(), col5_mask};
-  column_wrapper<numeric::decimal32> col6{col6_data, col6_data + num_rows, col6_mask};
-  column_wrapper<numeric::decimal64> col7{col7_data, col7_data + num_rows, col7_mask};
-
-  auto expected = table_view{{/*col0, */ col1, col2, col3, col4, col5, col6, col7}};
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  // expected_metadata.column_names.emplace_back("bools");
-  expected_metadata.column_metadata[0].set_name("int8s");
-  expected_metadata.column_metadata[1].set_name("int16s");
-  expected_metadata.column_metadata[2].set_name("int32s");
-  expected_metadata.column_metadata[3].set_name("floats");
-  expected_metadata.column_metadata[4].set_name("doubles");
-  expected_metadata.column_metadata[5].set_name("decimal32s").set_decimal_precision(9);
-  expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20);
-
-  auto filepath = temp_env->get_temp_filepath("MultiColumnWithNulls.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(&expected_metadata);
-
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  // TODO: Need to be able to return metadata in tree form from reader so they can be compared.
-  // Unfortunately the closest thing to a hierarchical schema is column_name_info which does not
-  // have any tests for it c++ or python.
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetWriterTest, Strings)
-{
-  std::vector<const char*> strings{
-    "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
-  const auto num_rows = strings.size();
-
-  auto seq_col0 = random_values<int>(num_rows);
-  auto seq_col2 = random_values<float>(num_rows);
-  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  column_wrapper<int> col0{seq_col0.begin(), seq_col0.end(), validity};
-  column_wrapper<cudf::string_view> col1{strings.begin(), strings.end()};
-  column_wrapper<float> col2{seq_col2.begin(), seq_col2.end(), validity};
-
-  auto expected = table_view{{col0, col1, col2}};
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("col_other");
-  expected_metadata.column_metadata[1].set_name("col_string");
-  expected_metadata.column_metadata[2].set_name("col_another");
-
-  auto filepath = temp_env->get_temp_filepath("Strings.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(&expected_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetWriterTest, StringsAsBinary)
-{
-  std::vector<const char*> unicode_strings{
-    "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
-  std::vector<const char*> ascii_strings{
-    "Monday", "Wednesday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
-
-  column_wrapper<cudf::string_view> col0{ascii_strings.begin(), ascii_strings.end()};
-  column_wrapper<cudf::string_view> col1{unicode_strings.begin(), unicode_strings.end()};
-  column_wrapper<cudf::string_view> col2{ascii_strings.begin(), ascii_strings.end()};
-  cudf::test::lists_column_wrapper<int8_t> col3{{'M', 'o', 'n', 'd', 'a', 'y'},
-                                                {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-                                                {'F', 'r', 'i', 'd', 'a', 'y'},
-                                                {'M', 'o', 'n', 'd', 'a', 'y'},
-                                                {'F', 'r', 'i', 'd', 'a', 'y'},
-                                                {'F', 'r', 'i', 'd', 'a', 'y'},
-                                                {'F', 'r', 'i', 'd', 'a', 'y'},
-                                                {'F', 'u', 'n', 'd', 'a', 'y'}};
-  cudf::test::lists_column_wrapper<int8_t> col4{
-    {'M', 'o', 'n', 'd', 'a', 'y'},
-    {'W', -56, -123, 'd', 'n', -56, -123, 's', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'M', 'o', 'n', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'F', 'u', 'n', 'd', 'a', 'y'}};
-
-  auto write_tbl = table_view{{col0, col1, col2, col3, col4}};
-
-  cudf::io::table_input_metadata expected_metadata(write_tbl);
-  expected_metadata.column_metadata[0].set_name("col_single").set_output_as_binary(true);
-  expected_metadata.column_metadata[1].set_name("col_string").set_output_as_binary(true);
-  expected_metadata.column_metadata[2].set_name("col_another").set_output_as_binary(true);
-  expected_metadata.column_metadata[3].set_name("col_binary");
-  expected_metadata.column_metadata[4].set_name("col_binary");
-
-  auto filepath = temp_env->get_temp_filepath("BinaryStrings.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, write_tbl)
-      .metadata(&expected_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .set_column_schema(
-        {cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
-         cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
-         cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
-         cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema()),
-         cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema())});
-  auto result   = cudf::io::read_parquet(in_opts);
-  auto expected = table_view{{col3, col4, col3, col3, col4}};
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetWriterTest, SlicedTable)
-{
-  // This test checks for writing zero copy, offsetted views into existing cudf tables
-
-  std::vector<const char*> strings{
-    "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
-  const auto num_rows = strings.size();
-
-  auto seq_col0 = random_values<int>(num_rows);
-  auto seq_col2 = random_values<float>(num_rows);
-  auto validity =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3 != 0; });
-
-  column_wrapper<int> col0{seq_col0.begin(), seq_col0.end(), validity};
-  column_wrapper<cudf::string_view> col1{strings.begin(), strings.end()};
-  column_wrapper<float> col2{seq_col2.begin(), seq_col2.end(), validity};
-
-  using lcw = cudf::test::lists_column_wrapper<uint64_t>;
-  lcw col3{{9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}};
-
-  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
-  // [NULL, [[13],[14,15,16]],  NULL]
-  // [NULL, [], NULL, [[]]]
-  // NULL
-  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
-  // [NULL, [[13],[14,15,16]],  NULL]
-  // [[[]]]
-  // [NULL, [], NULL, [[]]]
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-  lcw col4{{
-             {{{{1, 2, 3, 4}, valids}}, {{{5, 6, 7}, valids}, {8, 9}}},
-             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, valids},
-             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, valids},
-             lcw{lcw{lcw{}}},
-             {{{{1, 2, 3, 4}, valids}}, {{{5, 6, 7}, valids}, {8, 9}}},
-             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, valids},
-             lcw{lcw{lcw{}}},
-             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, valids},
-           },
-           valids2};
-
-  // Struct column
-  auto ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
-    {48, 27, 25, 31, 351, 351, 29, 15}, {1, 1, 1, 1, 1, 0, 1, 1}};
-
-  auto col5 = cudf::test::structs_column_wrapper{{ages_col}, {1, 1, 1, 1, 0, 1, 1, 1}};
-
-  // Struct/List mixed column
-
-  // []
-  // [NULL, 2, NULL]
-  // [4, 5]
-  // NULL
-  // []
-  // [7, 8, 9]
-  // [10]
-  // [11, 12]
-  lcw land{{{}, {{1, 2, 3}, valids}, {4, 5}, {}, {}, {7, 8, 9}, {10}, {11, 12}}, valids2};
-
-  // []
-  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
-  // [[7, 8], []]
-  // [[]]
-  // [[]]
-  // [[], [], []]
-  // [[10]]
-  // [[13, 14], [15]]
-  lcw flats{lcw{},
-            {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}},
-            {{7, 8}, {}},
-            lcw{lcw{}},
-            lcw{lcw{}},
-            lcw{lcw{}, lcw{}, lcw{}},
-            {lcw{10}},
-            {{13, 14}, {15}}};
-
-  auto struct_1 = cudf::test::structs_column_wrapper{land, flats};
-  auto is_human = cudf::test::fixed_width_column_wrapper<bool>{
-    {true, true, false, false, true, false, true, false}};
-  auto col6 = cudf::test::structs_column_wrapper{{is_human, struct_1}};
-
-  auto expected = table_view({col0, col1, col2, col3, col4, col5, col6});
-
-  // auto expected_slice = expected;
-  auto expected_slice = cudf::slice(expected, {2, static_cast<cudf::size_type>(num_rows) - 1});
-
-  cudf::io::table_input_metadata expected_metadata(expected_slice);
-  expected_metadata.column_metadata[0].set_name("col_other");
-  expected_metadata.column_metadata[1].set_name("col_string");
-  expected_metadata.column_metadata[2].set_name("col_another");
-  expected_metadata.column_metadata[3].set_name("col_list");
-  expected_metadata.column_metadata[4].set_name("col_multi_level_list");
-  expected_metadata.column_metadata[5].set_name("col_struct");
-  expected_metadata.column_metadata[5].set_name("col_struct_list");
-  expected_metadata.column_metadata[6].child(0).set_name("human?");
-  expected_metadata.column_metadata[6].child(1).set_name("particulars");
-  expected_metadata.column_metadata[6].child(1).child(0).set_name("land");
-  expected_metadata.column_metadata[6].child(1).child(1).set_name("flats");
-
-  auto filepath = temp_env->get_temp_filepath("SlicedTable.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice)
-      .metadata(&expected_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetWriterTest, ListColumn)
-{
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // [NULL, 2, NULL]
-  // []
-  // [4, 5]
-  // NULL
-  lcw col0{{{{1, 2, 3}, valids}, {}, {4, 5}, {}}, valids2};
-
-  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
-  // [[7, 8]]
-  // []
-  // [[]]
-  lcw col1{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}}, lcw{}, lcw{lcw{}}};
-
-  // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]]
-  // [[7, 8]]
-  // []
-  // [[]]
-  lcw col2{{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, valids2}, {{7, 8}}, lcw{}, lcw{lcw{}}};
-
-  // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]]
-  // [[7, 8]]
-  // []
-  // [[]]
-  using dlcw = cudf::test::lists_column_wrapper<double>;
-  dlcw col3{{{{1., 2., 3.}, {}, {4., 5.}, {}, {{0., 6., 0.}, valids}}, valids2},
-            {{7., 8.}},
-            dlcw{},
-            dlcw{dlcw{}}};
-
-  // TODO: uint16_t lists are not read properly in parquet reader
-  // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]]
-  // [[7, 8]]
-  // []
-  // NULL
-  // using ui16lcw = cudf::test::lists_column_wrapper<uint16_t>;
-  // cudf::test::lists_column_wrapper<uint16_t> col4{
-  //   {{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, valids2}, {{7, 8}}, ui16lcw{}, ui16lcw{ui16lcw{}}},
-  //   valids2};
-
-  // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]]
-  // [[7, 8]]
-  // []
-  // NULL
-  lcw col5{
-    {{{{1, 2, 3}, {}, {4, 5}, {}, {{0, 6, 0}, valids}}, valids2}, {{7, 8}}, lcw{}, lcw{lcw{}}},
-    valids2};
-
-  using strlcw = cudf::test::lists_column_wrapper<cudf::string_view>;
-  cudf::test::lists_column_wrapper<cudf::string_view> col6{
-    {{"Monday", "Monday", "Friday"}, {}, {"Monday", "Friday"}, {}, {"Sunday", "Funday"}},
-    {{"bee", "sting"}},
-    strlcw{},
-    strlcw{strlcw{}}};
-
-  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
-  // [NULL, [[13],[14,15,16]],  NULL]
-  // [NULL, [], NULL, [[]]]
-  // NULL
-  lcw col7{{
-             {{{{1, 2, 3, 4}, valids}}, {{{5, 6, 7}, valids}, {8, 9}}},
-             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, valids},
-             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, valids},
-             lcw{lcw{lcw{}}},
-           },
-           valids2};
-
-  table_view expected({col0, col1, col2, col3, /* col4, */ col5, col6, col7});
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("col_list_int_0");
-  expected_metadata.column_metadata[1].set_name("col_list_list_int_1");
-  expected_metadata.column_metadata[2].set_name("col_list_list_int_nullable_2");
-  expected_metadata.column_metadata[3].set_name("col_list_list_nullable_double_nullable_3");
-  // expected_metadata.column_metadata[0].set_name("col_list_list_uint16_4");
-  expected_metadata.column_metadata[4].set_name("col_list_nullable_list_nullable_int_nullable_5");
-  expected_metadata.column_metadata[5].set_name("col_list_list_string_6");
-  expected_metadata.column_metadata[6].set_name("col_list_list_list_7");
-
-  auto filepath = temp_env->get_temp_filepath("ListColumn.parquet");
-  auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-                    .metadata(&expected_metadata)
-                    .compression(cudf::io::compression_type::NONE);
-
-  cudf::io::write_parquet(out_opts);
-
-  auto in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result  = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetWriterTest, MultiIndex)
+TEST_F(ParquetChunkedReaderTest, Test)
 {
-  constexpr auto num_rows = 100;
-
-  auto col0_data = random_values<int8_t>(num_rows);
-  auto col1_data = random_values<int16_t>(num_rows);
-  auto col2_data = random_values<int32_t>(num_rows);
-  auto col3_data = random_values<float>(num_rows);
-  auto col4_data = random_values<double>(num_rows);
-  auto validity  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  column_wrapper<int8_t> col0{col0_data.begin(), col0_data.end(), validity};
-  column_wrapper<int16_t> col1{col1_data.begin(), col1_data.end(), validity};
-  column_wrapper<int32_t> col2{col2_data.begin(), col2_data.end(), validity};
-  column_wrapper<float> col3{col3_data.begin(), col3_data.end(), validity};
-  column_wrapper<double> col4{col4_data.begin(), col4_data.end(), validity};
-
-  auto expected = table_view{{col0, col1, col2, col3, col4}};
+  std::mt19937 gen(6542);
+  std::bernoulli_distribution bn(0.7f);
+  // auto valids =
+  //    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
+  auto values = thrust::make_counting_iterator(0);
 
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("int8s");
-  expected_metadata.column_metadata[1].set_name("int16s");
-  expected_metadata.column_metadata[2].set_name("int32s");
-  expected_metadata.column_metadata[3].set_name("floats");
-  expected_metadata.column_metadata[4].set_name("doubles");
+  constexpr cudf::size_type num_rows = 40000;
+  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
+  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
 
-  auto filepath = temp_env->get_temp_filepath("MultiIndex.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(&expected_metadata)
-      .key_value_metadata(
-        {{{"pandas", "\"index_columns\": [\"int8s\", \"int16s\"], \"column1\": [\"int32s\"]"}}});
-  cudf::io::write_parquet(out_opts);
+  cudf::table_view t({a, b});
+  cudf::io::parquet_writer_options opts = cudf::io::parquet_writer_options::builder(
+    cudf::io::sink_info{"/tmp/chunked_splits.parquet"}, t);
+  cudf::io::write_parquet(opts);
 
   cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .use_pandas_metadata(true)
-      .columns({"int32s", "floats", "doubles"});
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{"/tmp/chunked_splits.parquet"});
   auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetWriterTest, HostBuffer)
-{
-  constexpr auto num_rows = 100 << 10;
-  const auto seq_col      = random_values<int>(num_rows);
-  const auto validity =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-  column_wrapper<int> col{seq_col.begin(), seq_col.end(), validity};
-
-  const auto expected = table_view{{col}};
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("col_other");
-
-  std::vector<char> out_buffer;
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), expected)
-      .metadata(&expected_metadata);
-  cudf::io::write_parquet(out_opts);
-  cudf::io::parquet_reader_options in_opts = cudf::io::parquet_reader_options::builder(
-    cudf::io::source_info(out_buffer.data(), out_buffer.size()));
-  const auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetWriterTest, NonNullable)
-{
-  srand(31337);
-  auto expected = create_random_fixed_table<int>(9, 9, false);
-
-  auto filepath = temp_env->get_temp_filepath("NonNullable.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-TEST_F(ParquetWriterTest, Struct)
-{
-  // Struct<is_human:bool, Struct<names:string, ages:int>>
-
-  auto names = {"Samuel Vimes",
-                "Carrot Ironfoundersson",
-                "Angua von Uberwald",
-                "Cheery Littlebottom",
-                "Detritus",
-                "Mr Slant"};
-
-  // `Name` column has all valid values.
-  auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()};
-
-  auto ages_col =
-    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-
-  auto struct_1 = cudf::test::structs_column_wrapper{{names_col, ages_col}, {1, 1, 1, 1, 0, 1}};
-
-  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
-    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
-
-  auto struct_2 =
-    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
-
-  auto expected = table_view({*struct_2});
-
-  auto filepath = temp_env->get_temp_filepath("Struct.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options read_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
-  cudf::io::read_parquet(read_args);
+  printf("result size: %d\n", result.tbl->num_rows());
 }
-
-TEST_F(ParquetWriterTest, StructOfList)
-{
-  // Struct<is_human:bool,
-  //        Struct<weight:float,
-  //               ages:int,
-  //               land_unit:List<int>>,
-  //               flats:List<List<int>>
-  //              >
-  //       >
-
-  auto weights_col = cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
-
-  auto ages_col =
-    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // []
-  // [NULL, 2, NULL]
-  // [4, 5]
-  // NULL
-  // []
-  // [7, 8, 9]
-  lcw land_unit{{{}, {{1, 2, 3}, valids}, {4, 5}, {}, {}, {7, 8, 9}}, valids2};
-
-  // []
-  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
-  // [[7, 8], []]
-  // [[]]
-  // [[]]
-  // [[], [], []]
-  lcw flats{lcw{},
-            {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}},
-            {{7, 8}, {}},
-            lcw{lcw{}},
-            lcw{lcw{}},
-            lcw{lcw{}, lcw{}, lcw{}}};
-
-  auto struct_1 = cudf::test::structs_column_wrapper{{weights_col, ages_col, land_unit, flats},
-                                                     {1, 1, 1, 1, 0, 1}};
-
-  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
-    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
-
-  auto struct_2 =
-    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
-
-  auto expected = table_view({*struct_2});
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("being");
-  expected_metadata.column_metadata[0].child(0).set_name("human?");
-  expected_metadata.column_metadata[0].child(1).set_name("particulars");
-  expected_metadata.column_metadata[0].child(1).child(0).set_name("weight");
-  expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
-  expected_metadata.column_metadata[0].child(1).child(2).set_name("land_unit");
-  expected_metadata.column_metadata[0].child(1).child(3).set_name("flats");
-
-  auto filepath = temp_env->get_temp_filepath("StructOfList.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(&expected_metadata);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options read_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
-  const auto result = cudf::io::read_parquet(read_args);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetWriterTest, ListOfStruct)
-{
-  // List<Struct<is_human:bool,
-  //             Struct<weight:float,
-  //                    ages:int,
-  //                   >
-  //            >
-  //     >
-
-  auto weight_col = cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
-
-  auto ages_col =
-    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-
-  auto struct_1 = cudf::test::structs_column_wrapper{{weight_col, ages_col}, {1, 1, 1, 1, 0, 1}};
-
-  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
-    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
-
-  auto struct_2 =
-    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
-
-  auto list_offsets_column =
-    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 5, 5, 6}.release();
-  auto num_list_rows = list_offsets_column->size() - 1;
-
-  auto list_col = cudf::make_lists_column(num_list_rows,
-                                          std::move(list_offsets_column),
-                                          std::move(struct_2),
-                                          cudf::UNKNOWN_NULL_COUNT,
-                                          {});
-
-  auto expected = table_view({*list_col});
-
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[0].set_name("family");
-  expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
-  expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
-  expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight");
-  expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
-
-  auto filepath = temp_env->get_temp_filepath("ListOfStruct.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(&expected_metadata);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options read_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
-  const auto result = cudf::io::read_parquet(read_args);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-// custom data sink that supports device writes. uses plain file io.
-class custom_test_data_sink : public cudf::io::data_sink {
- public:
-  explicit custom_test_data_sink(std::string const& filepath)
-  {
-    outfile_.open(filepath, std::ios::out | std::ios::binary | std::ios::trunc);
-    CUDF_EXPECTS(outfile_.is_open(), "Cannot open output file");
-  }
-
-  virtual ~custom_test_data_sink() { flush(); }
-
-  void host_write(void const* data, size_t size) override
-  {
-    outfile_.write(static_cast<char const*>(data), size);
-  }
-
-  [[nodiscard]] bool supports_device_write() const override { return true; }
-
-  void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
-  {
-    this->device_write_async(gpu_data, size, stream).get();
-  }
-
-  std::future<void> device_write_async(void const* gpu_data,
-                                       size_t size,
-                                       rmm::cuda_stream_view stream) override
-  {
-    return std::async(std::launch::deferred, [=] {
-      char* ptr = nullptr;
-      CUDF_CUDA_TRY(cudaMallocHost(&ptr, size));
-      CUDF_CUDA_TRY(cudaMemcpyAsync(ptr, gpu_data, size, cudaMemcpyDeviceToHost, stream.value()));
-      stream.synchronize();
-      outfile_.write(ptr, size);
-      CUDF_CUDA_TRY(cudaFreeHost(ptr));
-    });
-  }
-
-  void flush() override { outfile_.flush(); }
-
-  size_t bytes_written() override { return outfile_.tellp(); }
-
- private:
-  std::ofstream outfile_;
-};
-
-TEST_F(ParquetWriterTest, CustomDataSink)
-{
-  auto filepath = temp_env->get_temp_filepath("CustomDataSink.parquet");
-  custom_test_data_sink custom_sink(filepath);
-
-  srand(31337);
-  auto expected = create_random_fixed_table<int>(5, 10, false);
-
-  // write out using the custom sink
-  {
-    cudf::io::parquet_writer_options args =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-    cudf::io::write_parquet(args);
-  }
-
-  // write out using a memmapped sink
-  std::vector<char> buf_sink;
-  {
-    cudf::io::parquet_writer_options args =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&buf_sink}, *expected);
-    cudf::io::write_parquet(args);
-  }
-
-  // read them back in and make sure everything matches
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-
-  cudf::io::parquet_reader_options buf_args = cudf::io::parquet_reader_options::builder(
-    cudf::io::source_info{buf_sink.data(), buf_sink.size()});
-  auto buf_tbl = cudf::io::read_parquet(buf_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(buf_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterTest, DeviceWriteLargeishFile)
-{
-  auto filepath = temp_env->get_temp_filepath("DeviceWriteLargeishFile.parquet");
-  custom_test_data_sink custom_sink(filepath);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_random_fixed_table<int>(4, 4 * 1024 * 1024, false);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterTest, PartitionedWrite)
-{
-  auto source = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 1000, false);
-
-  auto filepath1 = temp_env->get_temp_filepath("PartitionedWrite1.parquet");
-  auto filepath2 = temp_env->get_temp_filepath("PartitionedWrite2.parquet");
-
-  auto partition1 = cudf::io::partition_info{10, 1024 * 1024};
-  auto partition2 = cudf::io::partition_info{20 * 1024 + 7, 3 * 1024 * 1024};
-
-  auto expected1 =
-    cudf::slice(*source, {partition1.start_row, partition1.start_row + partition1.num_rows});
-  auto expected2 =
-    cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
-
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(
-      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
-      .partitions({partition1, partition2})
-      .compression(cudf::io::compression_type::NONE);
-  cudf::io::write_parquet(args);
-
-  auto result1 = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
-
-  auto result2 = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
-}
-
-TEST_F(ParquetWriterTest, PartitionedWriteEmptyPartitions)
-{
-  auto source = create_random_fixed_table<int>(4, 4, false);
-
-  auto filepath1 = temp_env->get_temp_filepath("PartitionedWrite1.parquet");
-  auto filepath2 = temp_env->get_temp_filepath("PartitionedWrite2.parquet");
-
-  auto partition1 = cudf::io::partition_info{1, 0};
-  auto partition2 = cudf::io::partition_info{1, 0};
-
-  auto expected1 =
-    cudf::slice(*source, {partition1.start_row, partition1.start_row + partition1.num_rows});
-  auto expected2 =
-    cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
-
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(
-      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
-      .partitions({partition1, partition2})
-      .compression(cudf::io::compression_type::NONE);
-  cudf::io::write_parquet(args);
-
-  auto result1 = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
-
-  auto result2 = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
-}
-
-TEST_F(ParquetWriterTest, PartitionedWriteEmptyColumns)
-{
-  auto source = create_random_fixed_table<int>(0, 4, false);
-
-  auto filepath1 = temp_env->get_temp_filepath("PartitionedWrite1.parquet");
-  auto filepath2 = temp_env->get_temp_filepath("PartitionedWrite2.parquet");
-
-  auto partition1 = cudf::io::partition_info{1, 0};
-  auto partition2 = cudf::io::partition_info{1, 0};
-
-  auto expected1 =
-    cudf::slice(*source, {partition1.start_row, partition1.start_row + partition1.num_rows});
-  auto expected2 =
-    cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
-
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(
-      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
-      .partitions({partition1, partition2})
-      .compression(cudf::io::compression_type::NONE);
-  cudf::io::write_parquet(args);
-
-  auto result1 = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
-
-  auto result2 = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
-}
-
-template <typename T>
-std::string create_parquet_file(int num_cols)
-{
-  srand(31337);
-  auto const table = create_random_fixed_table<T>(num_cols, 10, true);
-  auto const filepath =
-    temp_env->get_temp_filepath(typeid(T).name() + std::to_string(num_cols) + ".parquet");
-  cudf::io::parquet_writer_options const out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table->view());
-  cudf::io::write_parquet(out_opts);
-  return filepath;
-}
-
-TEST_F(ParquetWriterTest, MultipleMismatchedSources)
-{
-  auto const int5file = create_parquet_file<int>(5);
-  {
-    auto const float5file = create_parquet_file<float>(5);
-    std::vector<std::string> files{int5file, float5file};
-    cudf::io::parquet_reader_options const read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{files});
-    EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
-  }
-  {
-    auto const int10file = create_parquet_file<int>(10);
-    std::vector<std::string> files{int5file, int10file};
-    cudf::io::parquet_reader_options const read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{files});
-    EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
-  }
-}
-
-TEST_F(ParquetWriterTest, Slice)
-{
-  auto col =
-    cudf::test::fixed_width_column_wrapper<int>{{1, 2, 3, 4, 5}, {true, true, true, false, true}};
-  std::vector<cudf::size_type> indices{2, 5};
-  std::vector<cudf::column_view> result = cudf::slice(col, indices);
-  cudf::table_view tbl{result};
-
-  auto filepath = temp_env->get_temp_filepath("Slice.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto read_table = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(read_table.tbl->view(), tbl);
-}
-
-TEST_F(ParquetChunkedWriterTest, SingleTable)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(5, 5, true);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedSingle.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(*table1);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1);
-}
-
-TEST_F(ParquetChunkedWriterTest, SimpleTable)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(5, 5, true);
-  auto table2 = create_random_fixed_table<int>(5, 5, true);
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedSimple.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-}
-
-TEST_F(ParquetChunkedWriterTest, LargeTables)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(512, 4096, true);
-  auto table2 = create_random_fixed_table<int>(512, 8192, true);
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedLarge.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  auto md = cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2).close();
-  CUDF_EXPECTS(!md, "The return value should be null.");
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-}
-
-TEST_F(ParquetChunkedWriterTest, ManyTables)
-{
-  srand(31337);
-  std::vector<std::unique_ptr<table>> tables;
-  std::vector<table_view> table_views;
-  constexpr int num_tables = 96;
-  for (int idx = 0; idx < num_tables; idx++) {
-    auto tbl = create_random_fixed_table<int>(16, 64, true);
-    table_views.push_back(*tbl);
-    tables.push_back(std::move(tbl));
-  }
-
-  auto expected = cudf::concatenate(table_views);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedManyTables.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer writer(args);
-  std::for_each(table_views.begin(), table_views.end(), [&writer](table_view const& tbl) {
-    writer.write(tbl);
-  });
-  auto md = writer.close({"dummy/path"});
-  CUDF_EXPECTS(md, "The returned metadata should not be null.");
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-TEST_F(ParquetChunkedWriterTest, Strings)
-{
-  std::vector<std::unique_ptr<cudf::column>> cols;
-
-  bool mask1[] = {true, true, false, true, true, true, true};
-  std::vector<const char*> h_strings1{"four", "score", "and", "seven", "years", "ago", "abcdefgh"};
-  cudf::test::strings_column_wrapper strings1(h_strings1.begin(), h_strings1.end(), mask1);
-  cols.push_back(strings1.release());
-  cudf::table tbl1(std::move(cols));
-
-  bool mask2[] = {false, true, true, true, true, true, true};
-  std::vector<const char*> h_strings2{"ooooo", "ppppppp", "fff", "j", "cccc", "bbb", "zzzzzzzzzzz"};
-  cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), mask2);
-  cols.push_back(strings2.release());
-  cudf::table tbl2(std::move(cols));
-
-  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedStrings.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-TEST_F(ParquetChunkedWriterTest, ListColumn)
-{
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // COL0 (Same nullability) ====================
-  // [NULL, 2, NULL]
-  // []
-  // [4, 5]
-  // NULL
-  lcw col0_tbl0{{{{1, 2, 3}, valids}, {}, {4, 5}, {}}, valids2};
-
-  // [7, 8, 9]
-  // []
-  // [NULL, 11]
-  // NULL
-  lcw col0_tbl1{{{7, 8, 9}, {}, {{10, 11}, valids}, {}}, valids2};
-
-  // COL1 (Nullability different in different chunks, test of merging nullability in writer)
-  // [NULL, 2, NULL]
-  // []
-  // [4, 5]
-  // []
-  lcw col1_tbl0{{{1, 2, 3}, valids}, {}, {4, 5}, {}};
-
-  // [7, 8, 9]
-  // []
-  // [10, 11]
-  // NULL
-  lcw col1_tbl1{{{7, 8, 9}, {}, {10, 11}, {}}, valids2};
-
-  // COL2 (non-nested columns to test proper schema construction)
-  size_t num_rows_tbl0 = static_cast<cudf::column_view>(col0_tbl0).size();
-  size_t num_rows_tbl1 = static_cast<cudf::column_view>(col0_tbl1).size();
-  auto seq_col0        = random_values<int>(num_rows_tbl0);
-  auto seq_col1        = random_values<int>(num_rows_tbl1);
-
-  column_wrapper<int> col2_tbl0{seq_col0.begin(), seq_col0.end(), valids};
-  column_wrapper<int> col2_tbl1{seq_col1.begin(), seq_col1.end(), valids2};
-
-  auto tbl0 = table_view({col0_tbl0, col1_tbl0, col2_tbl0});
-  auto tbl1 = table_view({col0_tbl1, col1_tbl1, col2_tbl1});
-
-  auto expected = cudf::concatenate(std::vector<table_view>({tbl0, tbl1}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(tbl0).write(tbl1);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-TEST_F(ParquetChunkedWriterTest, ListOfStruct)
-{
-  // Table 1
-  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
-  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
-  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1};
-  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}};
-  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
-
-  auto list_offsets_column_1 =
-    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 3, 3}.release();
-  auto num_list_rows_1 = list_offsets_column_1->size() - 1;
-
-  auto list_col_1 = cudf::make_lists_column(num_list_rows_1,
-                                            std::move(list_offsets_column_1),
-                                            struct_2_1.release(),
-                                            cudf::UNKNOWN_NULL_COUNT,
-                                            {});
-
-  auto table_1 = table_view({*list_col_1});
-
-  // Table 2
-  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{1.1, -1.0, -1.0}};
-  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{31, 351, 351}, {1, 1, 0}};
-  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}};
-  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false, false}, {1, 1, 0}};
-  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
-
-  auto list_offsets_column_2 =
-    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 1, 2, 3}.release();
-  auto num_list_rows_2 = list_offsets_column_2->size() - 1;
-
-  auto list_col_2 = cudf::make_lists_column(num_list_rows_2,
-                                            std::move(list_offsets_column_2),
-                                            struct_2_2.release(),
-                                            cudf::UNKNOWN_NULL_COUNT,
-                                            {});
-
-  auto table_2 = table_view({*list_col_2});
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
-
-  cudf::io::table_input_metadata expected_metadata(table_1);
-  expected_metadata.column_metadata[0].set_name("family");
-  expected_metadata.column_metadata[0].child(1).set_nullability(false);
-  expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
-  expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
-  expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight");
-  expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedListOfStruct.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  args.set_metadata(&expected_metadata);
-  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList)
-{
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // Table 1 ===========================
-
-  // []
-  // [NULL, 2, NULL]
-  // [4, 5]
-  // NULL
-  lcw land_1{{{}, {{1, 2, 3}, valids}, {4, 5}, {}}, valids2};
-
-  // []
-  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
-  // [[7, 8], []]
-  // [[]]
-  lcw flats_1{lcw{}, {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}, {}}, lcw{lcw{}}};
-
-  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3, 1.1}};
-  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5, 31}};
-  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1, land_1, flats_1};
-  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false, false}};
-  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
-
-  auto list_offsets_column_1 =
-    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 3, 4}.release();
-  auto num_list_rows_1 = list_offsets_column_1->size() - 1;
-
-  auto list_col_1 = cudf::make_lists_column(num_list_rows_1,
-                                            std::move(list_offsets_column_1),
-                                            struct_2_1.release(),
-                                            cudf::UNKNOWN_NULL_COUNT,
-                                            {});
-
-  auto table_1 = table_view({*list_col_1});
-
-  // Table 2 ===========================
-
-  // []
-  // [7, 8, 9]
-  lcw land_2{{}, {7, 8, 9}};
-
-  // [[]]
-  // [[], [], []]
-  lcw flats_2{lcw{lcw{}}, lcw{lcw{}, lcw{}, lcw{}}};
-
-  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{-1.0, -1.0}};
-  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{351, 351}, {1, 0}};
-  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2, land_2, flats_2}, {0, 1}};
-  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false}, {1, 0}};
-  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
-
-  auto list_offsets_column_2 =
-    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 1, 2}.release();
-  auto num_list_rows_2 = list_offsets_column_2->size() - 1;
-
-  auto list_col_2 = cudf::make_lists_column(num_list_rows_2,
-                                            std::move(list_offsets_column_2),
-                                            struct_2_2.release(),
-                                            cudf::UNKNOWN_NULL_COUNT,
-                                            {});
-
-  auto table_2 = table_view({*list_col_2});
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
-
-  cudf::io::table_input_metadata expected_metadata(table_1);
-  expected_metadata.column_metadata[0].set_name("family");
-  expected_metadata.column_metadata[0].child(1).set_nullability(false);
-  expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
-  expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
-  expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight");
-  expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
-  expected_metadata.column_metadata[0].child(1).child(1).child(2).set_name("land_unit");
-  expected_metadata.column_metadata[0].child(1).child(1).child(3).set_name("flats");
-
-  auto filepath = temp_env->get_temp_filepath("ListOfStructOfStructOfListOfList.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  args.set_metadata(&expected_metadata);
-  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-
-  // We specifically mentioned in input schema that struct_2 is non-nullable across chunked calls.
-  auto result_parent_list = result.tbl->get_column(0);
-  auto result_struct_2    = result_parent_list.child(cudf::lists_column_view::child_column_index);
-  EXPECT_EQ(result_struct_2.nullable(), false);
-}
-
-TEST_F(ParquetChunkedWriterTest, MismatchedTypes)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(4, 4, true);
-  auto table2 = create_random_fixed_table<float>(4, 4, true);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedTypes.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer writer(args);
-  writer.write(*table1);
-  EXPECT_THROW(writer.write(*table2), cudf::logic_error);
-  writer.close();
-}
-
-TEST_F(ParquetChunkedWriterTest, ChunkedWriteAfterClosing)
-{
-  srand(31337);
-  auto table = create_random_fixed_table<int>(4, 4, true);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedWriteAfterClosing.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer writer(args);
-  writer.write(*table).close();
-  EXPECT_THROW(writer.write(*table), cudf::logic_error);
-}
-
-TEST_F(ParquetChunkedWriterTest, ReadingUnclosedFile)
-{
-  srand(31337);
-  auto table = create_random_fixed_table<int>(4, 4, true);
-
-  auto filepath = temp_env->get_temp_filepath("ReadingUnclosedFile.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer writer(args);
-  writer.write(*table);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
-}
-
-TEST_F(ParquetChunkedWriterTest, MismatchedStructure)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(4, 4, true);
-  auto table2 = create_random_fixed_table<float>(3, 4, true);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedStructure.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer writer(args);
-  writer.write(*table1);
-  EXPECT_THROW(writer.write(*table2), cudf::logic_error);
-  writer.close();
-}
-
-TEST_F(ParquetChunkedWriterTest, MismatchedStructureList)
-{
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // COL0 (mismatched depth) ====================
-  // [NULL, 2, NULL]
-  // []
-  // [4, 5]
-  // NULL
-  lcw col00{{{{1, 2, 3}, valids}, {}, {4, 5}, {}}, valids2};
-
-  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
-  // [[7, 8]]
-  // []
-  // [[]]
-  lcw col01{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}}, lcw{}, lcw{lcw{}}};
-
-  // COL2 (non-nested columns to test proper schema construction)
-  size_t num_rows = static_cast<cudf::column_view>(col00).size();
-  auto seq_col0   = random_values<int>(num_rows);
-  auto seq_col1   = random_values<int>(num_rows);
-
-  column_wrapper<int> col10{seq_col0.begin(), seq_col0.end(), valids};
-  column_wrapper<int> col11{seq_col1.begin(), seq_col1.end(), valids2};
-
-  auto tbl0 = table_view({col00, col10});
-  auto tbl1 = table_view({col01, col11});
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer writer(args);
-  writer.write(tbl0);
-  EXPECT_THROW(writer.write(tbl1), cudf::logic_error);
-}
-
-TEST_F(ParquetChunkedWriterTest, DifferentNullability)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(5, 5, true);
-  auto table2 = create_random_fixed_table<int>(5, 5, false);
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedNullable.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-}
-
-TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct)
-{
-  // Struct<is_human:bool (non-nullable),
-  //        Struct<weight:float>,
-  //               age:int
-  //              > (nullable)
-  //       > (non-nullable)
-
-  // Table 1: is_human and struct_1 are non-nullable but should be nullable when read back.
-  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
-  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
-  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1};
-  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}};
-  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
-  auto table_1    = cudf::table_view({struct_2_1});
-
-  // Table 2: struct_1 and is_human are nullable now so if we hadn't assumed worst case (nullable)
-  // when writing table_1, we would have wrong pages for it.
-  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{1.1, -1.0, -1.0}};
-  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{31, 351, 351}, {1, 1, 0}};
-  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}};
-  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false, false}, {1, 1, 0}};
-  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
-  auto table_2    = cudf::table_view({struct_2_2});
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
-
-  cudf::io::table_input_metadata expected_metadata(table_1);
-  expected_metadata.column_metadata[0].set_name("being");
-  expected_metadata.column_metadata[0].child(0).set_name("human?");
-  expected_metadata.column_metadata[0].child(1).set_name("particulars");
-  expected_metadata.column_metadata[0].child(1).child(0).set_name("weight");
-  expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  args.set_metadata(&expected_metadata);
-  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetChunkedWriterTest, ForcedNullability)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(5, 5, false);
-  auto table2 = create_random_fixed_table<int>(5, 5, false);
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedNoNullable.parquet");
-
-  cudf::io::table_input_metadata metadata(*table1);
-
-  // In the absence of prescribed per-column nullability in metadata, the writer assumes the worst
-  // and considers all columns nullable. However cudf::concatenate will not force nulls in case no
-  // columns are nullable. To get the expected result, we tell the writer the nullability of all
-  // columns in advance.
-  for (auto& col_meta : metadata.column_metadata) {
-    col_meta.set_nullability(false);
-  }
-
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath})
-      .metadata(&metadata);
-  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-}
-
-TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList)
-{
-  srand(31337);
-
-  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
-  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
-
-  using lcw = cudf::test::lists_column_wrapper<int32_t>;
-
-  // COL0 ====================
-  // [1, 2, 3]
-  // []
-  // [4, 5]
-  // NULL
-  lcw col00{{{1, 2, 3}, {}, {4, 5}, {}}, valids2};
-
-  // [7]
-  // []
-  // [8, 9, 10, 11]
-  // NULL
-  lcw col01{{{7}, {}, {8, 9, 10, 11}, {}}, valids2};
-
-  // COL1 (non-nested columns to test proper schema construction)
-  size_t num_rows = static_cast<cudf::column_view>(col00).size();
-  auto seq_col0   = random_values<int>(num_rows);
-  auto seq_col1   = random_values<int>(num_rows);
-
-  column_wrapper<int> col10{seq_col0.begin(), seq_col0.end(), valids};
-  column_wrapper<int> col11{seq_col1.begin(), seq_col1.end(), valids2};
-
-  auto table1 = table_view({col00, col10});
-  auto table2 = table_view({col01, col11});
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({table1, table2}));
-
-  cudf::io::table_input_metadata metadata(table1);
-  metadata.column_metadata[0].set_nullability(true);  // List is nullable at first (root) level
-  metadata.column_metadata[0].child(1).set_nullability(
-    false);  // non-nullable at second (leaf) level
-  metadata.column_metadata[1].set_nullability(true);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedListNullable.parquet");
-
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath})
-      .metadata(&metadata);
-  cudf::io::parquet_chunked_writer(args).write(table1).write(table2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-}
-
-TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct)
-{
-  // Struct<is_human:bool (non-nullable),
-  //        Struct<weight:float>,
-  //               age:int
-  //              > (nullable)
-  //       > (non-nullable)
-
-  // Table 1: is_human and struct_2 are non-nullable and should stay that way when read back.
-  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
-  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
-  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1};
-  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}};
-  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
-  auto table_1    = cudf::table_view({struct_2_1});
-
-  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{1.1, -1.0, -1.0}};
-  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{31, 351, 351}, {1, 1, 0}};
-  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}};
-  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false, false}};
-  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
-  auto table_2    = cudf::table_view({struct_2_2});
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
-
-  cudf::io::table_input_metadata expected_metadata(table_1);
-  expected_metadata.column_metadata[0].set_name("being").set_nullability(false);
-  expected_metadata.column_metadata[0].child(0).set_name("human?").set_nullability(false);
-  expected_metadata.column_metadata[0].child(1).set_name("particulars");
-  expected_metadata.column_metadata[0].child(1).child(0).set_name("weight");
-  expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  args.set_metadata(&expected_metadata);
-  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-}
-
-TEST_F(ParquetChunkedWriterTest, ReadRowGroups)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(5, 5, true);
-  auto table2 = create_random_fixed_table<int>(5, 5, true);
-
-  auto full_table = cudf::concatenate(std::vector<table_view>({*table2, *table1, *table2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedRowGroups.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  {
-    cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
-  }
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .row_groups({{1, 0, 1}});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
-}
-
-TEST_F(ParquetChunkedWriterTest, ReadRowGroupsError)
-{
-  srand(31337);
-  auto table1 = create_random_fixed_table<int>(5, 5, true);
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedRowGroupsError.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(*table1);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).row_groups({{0, 1}});
-  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
-  read_opts.set_row_groups({{-1}});
-  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
-  read_opts.set_row_groups({{0}, {0}});
-  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
-}
-
-TEST_F(ParquetWriterTest, DecimalWrite)
-{
-  constexpr cudf::size_type num_rows = 500;
-  auto seq_col0                      = random_values<int32_t>(num_rows);
-  auto seq_col1                      = random_values<int64_t>(num_rows);
-
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
-
-  auto col0 = cudf::test::fixed_point_column_wrapper<int32_t>{
-    seq_col0.begin(), seq_col0.end(), valids, numeric::scale_type{5}};
-  auto col1 = cudf::test::fixed_point_column_wrapper<int64_t>{
-    seq_col1.begin(), seq_col1.end(), valids, numeric::scale_type{-9}};
-
-  auto table = table_view({col0, col1});
-
-  auto filepath = temp_env->get_temp_filepath("DecimalWrite.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table);
-
-  cudf::io::table_input_metadata expected_metadata(table);
-
-  // verify failure if too small a precision is given
-  expected_metadata.column_metadata[0].set_decimal_precision(7);
-  expected_metadata.column_metadata[1].set_decimal_precision(1);
-  args.set_metadata(&expected_metadata);
-  EXPECT_THROW(cudf::io::write_parquet(args), cudf::logic_error);
-
-  // verify success if equal precision is given
-  expected_metadata.column_metadata[0].set_decimal_precision(7);
-  expected_metadata.column_metadata[1].set_decimal_precision(9);
-  args.set_metadata(&expected_metadata);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, table);
-}
-
-TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize)
-{
-  // write out two 31 row tables and make sure they get
-  // read back with all their validity bits in the right place
-
-  using T = TypeParam;
-
-  int num_els = 31;
-  std::vector<std::unique_ptr<cudf::column>> cols;
-
-  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true, true, true,
-
-                 true,  true, true, true, true, true, true, true, true};
-  T c1a[num_els];
-  std::fill(c1a, c1a + num_els, static_cast<T>(5));
-  T c1b[num_els];
-  std::fill(c1b, c1b + num_els, static_cast<T>(6));
-  column_wrapper<T> c1a_w(c1a, c1a + num_els, mask);
-  column_wrapper<T> c1b_w(c1b, c1b + num_els, mask);
-  cols.push_back(c1a_w.release());
-  cols.push_back(c1b_w.release());
-  cudf::table tbl1(std::move(cols));
-
-  T c2a[num_els];
-  std::fill(c2a, c2a + num_els, static_cast<T>(8));
-  T c2b[num_els];
-  std::fill(c2b, c2b + num_els, static_cast<T>(9));
-  column_wrapper<T> c2a_w(c2a, c2a + num_els, mask);
-  column_wrapper<T> c2b_w(c2b, c2b + num_els, mask);
-  cols.push_back(c2a_w.release());
-  cols.push_back(c2b_w.release());
-  cudf::table tbl2(std::move(cols));
-
-  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize2)
-{
-  // write out two 33 row tables and make sure they get
-  // read back with all their validity bits in the right place
-
-  using T = TypeParam;
-
-  int num_els = 33;
-  std::vector<std::unique_ptr<cudf::column>> cols;
-
-  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true, true, true};
-
-  T c1a[num_els];
-  std::fill(c1a, c1a + num_els, static_cast<T>(5));
-  T c1b[num_els];
-  std::fill(c1b, c1b + num_els, static_cast<T>(6));
-  column_wrapper<T> c1a_w(c1a, c1a + num_els, mask);
-  column_wrapper<T> c1b_w(c1b, c1b + num_els, mask);
-  cols.push_back(c1a_w.release());
-  cols.push_back(c1b_w.release());
-  cudf::table tbl1(std::move(cols));
-
-  T c2a[num_els];
-  std::fill(c2a, c2a + num_els, static_cast<T>(8));
-  T c2b[num_els];
-  std::fill(c2b, c2b + num_els, static_cast<T>(9));
-  column_wrapper<T> c2a_w(c2a, c2a + num_els, mask);
-  column_wrapper<T> c2b_w(c2b, c2b + num_els, mask);
-  cols.push_back(c2a_w.release());
-  cols.push_back(c2b_w.release());
-  cudf::table tbl2(std::move(cols));
-
-  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
-
-  auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.parquet");
-  cudf::io::chunked_parquet_writer_options args =
-    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
-  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
-}
-
-// custom mem mapped data sink that supports device writes
-template <bool supports_device_writes>
-class custom_test_memmap_sink : public cudf::io::data_sink {
- public:
-  explicit custom_test_memmap_sink(std::vector<char>* mm_writer_buf)
-  {
-    mm_writer = cudf::io::data_sink::create(mm_writer_buf);
-  }
-
-  virtual ~custom_test_memmap_sink() { mm_writer->flush(); }
-
-  void host_write(void const* data, size_t size) override { mm_writer->host_write(data, size); }
-
-  [[nodiscard]] bool supports_device_write() const override { return supports_device_writes; }
-
-  void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
-  {
-    this->device_write_async(gpu_data, size, stream).get();
-  }
-
-  std::future<void> device_write_async(void const* gpu_data,
-                                       size_t size,
-                                       rmm::cuda_stream_view stream) override
-  {
-    return std::async(std::launch::deferred, [=] {
-      char* ptr = nullptr;
-      CUDF_CUDA_TRY(cudaMallocHost(&ptr, size));
-      CUDF_CUDA_TRY(cudaMemcpyAsync(ptr, gpu_data, size, cudaMemcpyDeviceToHost, stream.value()));
-      stream.synchronize();
-      mm_writer->host_write(ptr, size);
-      CUDF_CUDA_TRY(cudaFreeHost(ptr));
-    });
-  }
-
-  void flush() override { mm_writer->flush(); }
-
-  size_t bytes_written() override { return mm_writer->bytes_written(); }
-
- private:
-  std::unique_ptr<data_sink> mm_writer;
-};
-
-TEST_F(ParquetWriterStressTest, LargeTableWeakCompression)
-{
-  std::vector<char> mm_buf;
-  mm_buf.reserve(4 * 1024 * 1024 * 16);
-  custom_test_memmap_sink<false> custom_sink(&mm_buf);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_random_fixed_table<int>(16, 4 * 1024 * 1024, false);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterStressTest, LargeTableGoodCompression)
-{
-  std::vector<char> mm_buf;
-  mm_buf.reserve(4 * 1024 * 1024 * 16);
-  custom_test_memmap_sink<false> custom_sink(&mm_buf);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 128 * 1024, false);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterStressTest, LargeTableWithValids)
-{
-  std::vector<char> mm_buf;
-  mm_buf.reserve(4 * 1024 * 1024 * 16);
-  custom_test_memmap_sink<false> custom_sink(&mm_buf);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 6, true);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWeakCompression)
-{
-  std::vector<char> mm_buf;
-  mm_buf.reserve(4 * 1024 * 1024 * 16);
-  custom_test_memmap_sink<true> custom_sink(&mm_buf);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_random_fixed_table<int>(16, 4 * 1024 * 1024, false);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableGoodCompression)
-{
-  std::vector<char> mm_buf;
-  mm_buf.reserve(4 * 1024 * 1024 * 16);
-  custom_test_memmap_sink<true> custom_sink(&mm_buf);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 128 * 1024, false);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWithValids)
-{
-  std::vector<char> mm_buf;
-  mm_buf.reserve(4 * 1024 * 1024 * 16);
-  custom_test_memmap_sink<true> custom_sink(&mm_buf);
-
-  // exercises multiple rowgroups
-  srand(31337);
-  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 6, true);
-
-  // write out using the custom sink (which uses device writes)
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options custom_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf::io::read_parquet(custom_args);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
-}
-
-TEST_F(ParquetReaderTest, UserBounds)
-{
-  // trying to read more rows than there are should result in
-  // receiving the properly capped # of rows
-  {
-    srand(31337);
-    auto expected = create_random_fixed_table<int>(4, 4, false);
-
-    auto filepath = temp_env->get_temp_filepath("TooManyRows.parquet");
-    cudf::io::parquet_writer_options args =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
-    cudf::io::write_parquet(args);
-
-    // attempt to read more rows than there actually are
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(16);
-    auto result = cudf::io::read_parquet(read_opts);
-
-    // we should only get back 4 rows
-    EXPECT_EQ(result.tbl->view().column(0).size(), 4);
-  }
-
-  // trying to read past the end of the # of actual rows should result
-  // in empty columns.
-  {
-    srand(31337);
-    auto expected = create_random_fixed_table<int>(4, 4, false);
-
-    auto filepath = temp_env->get_temp_filepath("PastBounds.parquet");
-    cudf::io::parquet_writer_options args =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
-    cudf::io::write_parquet(args);
-
-    // attempt to read more rows than there actually are
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).skip_rows(4);
-    auto result = cudf::io::read_parquet(read_opts);
-
-    // we should get empty columns back
-    EXPECT_EQ(result.tbl->view().num_columns(), 4);
-    EXPECT_EQ(result.tbl->view().column(0).size(), 0);
-  }
-
-  // trying to read 0 rows should result in reading the whole file
-  // at the moment we get back 4.  when that bug gets fixed, this
-  // test can be flipped.
-  {
-    srand(31337);
-    auto expected = create_random_fixed_table<int>(4, 4, false);
-
-    auto filepath = temp_env->get_temp_filepath("ZeroRows.parquet");
-    cudf::io::parquet_writer_options args =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
-    cudf::io::write_parquet(args);
-
-    // attempt to read more rows than there actually are
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(0);
-    auto result = cudf::io::read_parquet(read_opts);
-
-    EXPECT_EQ(result.tbl->view().num_columns(), 4);
-    EXPECT_EQ(result.tbl->view().column(0).size(), 0);
-  }
-
-  // trying to read 0 rows past the end of the # of actual rows should result
-  // in empty columns.
-  {
-    srand(31337);
-    auto expected = create_random_fixed_table<int>(4, 4, false);
-
-    auto filepath = temp_env->get_temp_filepath("ZeroRowsPastBounds.parquet");
-    cudf::io::parquet_writer_options args =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
-    cudf::io::write_parquet(args);
-
-    // attempt to read more rows than there actually are
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .skip_rows(4)
-        .num_rows(0);
-    auto result = cudf::io::read_parquet(read_opts);
-
-    // we should get empty columns back
-    EXPECT_EQ(result.tbl->view().num_columns(), 4);
-    EXPECT_EQ(result.tbl->view().column(0).size(), 0);
-  }
-}
-
-TEST_F(ParquetReaderTest, UserBoundsWithNulls)
-{
-  // clang-format off
-  cudf::test::fixed_width_column_wrapper<float> col{{1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,  5,5,5,5,5,5,5,5, 6,6,6,6,6,6,6,6, 7,7,7,7,7,7,7,7, 8,8,8,8,8,8,8,8}
-                                                   ,{1,1,1,0,0,0,1,1, 1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0, 1,1,1,1,1,1,0,0,  1,0,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,0}};
-  // clang-format on
-  cudf::table_view tbl({col});
-  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNulls.parquet");
-  cudf::io::parquet_writer_options out_args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
-  cudf::io::write_parquet(out_args);
-
-  // skip_rows / num_rows
-  // clang-format off
-  std::vector<std::pair<int, int>> params{ {-1, -1}, {1, 3}, {3, -1},
-                                           {31, -1}, {32, -1}, {33, -1},
-                                           {31, 5}, {32, 5}, {33, 5},
-                                           {-1, 7}, {-1, 31}, {-1, 32}, {-1, 33},
-                                           {62, -1}, {63, -1},
-                                           {62, 2}, {63, 1}};
-  // clang-format on
-  for (auto p : params) {
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
-    if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf::io::read_parquet(read_args);
-
-    p.first  = p.first < 0 ? 0 : p.first;
-    p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
-    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
-    auto expected = cudf::slice(col, slice_indices);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), expected[0]);
-  }
-}
-
-TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
-{
-  constexpr int num_rows = 32 * 1024;
-
-  std::mt19937 gen(6542);
-  std::bernoulli_distribution bn(0.7f);
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
-  auto values = thrust::make_counting_iterator(0);
-
-  // int64
-  cudf::test::fixed_width_column_wrapper<int64_t> c0(values, values + num_rows, valids);
-
-  // list<float>
-  constexpr int floats_per_row = 4;
-  auto c1_offset_iter          = cudf::detail::make_counting_transform_iterator(
-    0, [floats_per_row](cudf::size_type idx) { return idx * floats_per_row; });
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> c1_offsets(
-    c1_offset_iter, c1_offset_iter + num_rows + 1);
-  cudf::test::fixed_width_column_wrapper<float> c1_floats(
-    values, values + (num_rows * floats_per_row), valids);
-  auto _c1 = cudf::make_lists_column(num_rows,
-                                     c1_offsets.release(),
-                                     c1_floats.release(),
-                                     cudf::UNKNOWN_NULL_COUNT,
-                                     cudf::test::detail::make_null_mask(valids, valids + num_rows));
-  auto c1  = cudf::purge_nonempty_nulls(static_cast<cudf::lists_column_view>(*_c1));
-
-  // list<list<int>>
-  auto c2 = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
-
-  // struct<list<string>, int, float>
-  std::vector<std::string> strings{
-    "abc", "x", "bananas", "gpu", "minty", "backspace", "", "cayenne", "turbine", "soft"};
-  std::uniform_int_distribution<int> uni(0, strings.size() - 1);
-  auto string_iter = cudf::detail::make_counting_transform_iterator(
-    0, [&](cudf::size_type idx) { return strings[uni(gen)]; });
-  constexpr int string_per_row  = 3;
-  constexpr int num_string_rows = num_rows * string_per_row;
-  cudf::test::strings_column_wrapper string_col{string_iter, string_iter + num_string_rows};
-  auto offset_iter = cudf::detail::make_counting_transform_iterator(
-    0, [string_per_row](cudf::size_type idx) { return idx * string_per_row; });
-  cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets(offset_iter,
-                                                                    offset_iter + num_rows + 1);
-  auto _c3_list =
-    cudf::make_lists_column(num_rows,
-                            offsets.release(),
-                            string_col.release(),
-                            cudf::UNKNOWN_NULL_COUNT,
-                            cudf::test::detail::make_null_mask(valids, valids + num_rows));
-  auto c3_list = cudf::purge_nonempty_nulls(static_cast<cudf::lists_column_view>(*_c3_list));
-  cudf::test::fixed_width_column_wrapper<int> c3_ints(values, values + num_rows, valids);
-  cudf::test::fixed_width_column_wrapper<float> c3_floats(values, values + num_rows, valids);
-  std::vector<std::unique_ptr<cudf::column>> c3_children;
-  c3_children.push_back(std::move(c3_list));
-  c3_children.push_back(c3_ints.release());
-  c3_children.push_back(c3_floats.release());
-  cudf::test::structs_column_wrapper _c3(std::move(c3_children));
-  auto c3 = cudf::purge_nonempty_nulls(static_cast<cudf::structs_column_view>(_c3));
-
-  // write it out
-  cudf::table_view tbl({c0, *c1, *c2, *c3});
-  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsMixedTypes.parquet");
-  cudf::io::parquet_writer_options out_args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
-  cudf::io::write_parquet(out_args);
-
-  // read it back
-  std::vector<std::pair<int, int>> params{
-    {-1, -1}, {0, num_rows}, {1, num_rows - 1}, {num_rows - 1, 1}, {517, 22000}};
-  for (auto p : params) {
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
-    if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf::io::read_parquet(read_args);
-
-    p.first  = p.first < 0 ? 0 : p.first;
-    p.second = p.second < 0 ? num_rows - p.first : p.second;
-    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
-    auto expected = cudf::slice(tbl, slice_indices);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected[0]);
-  }
-}
-
-TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge)
-{
-  constexpr int num_rows = 30 * 1000000;
-
-  std::mt19937 gen(6747);
-  std::bernoulli_distribution bn(0.7f);
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
-  auto values = thrust::make_counting_iterator(0);
-
-  cudf::test::fixed_width_column_wrapper<int> col(values, values + num_rows, valids);
-
-  // this file will have row groups of 1,000,000 each
-  cudf::table_view tbl({col});
-  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsLarge.parquet");
-  cudf::io::parquet_writer_options out_args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
-  cudf::io::write_parquet(out_args);
-
-  // skip_rows / num_rows
-  // clang-format off
-  std::vector<std::pair<int, int>> params{ {-1, -1}, {31, -1}, {32, -1}, {33, -1}, {1613470, -1}, {1999999, -1},
-                                           {31, 1}, {32, 1}, {33, 1},
-                                           // deliberately span some row group boundaries
-                                           {999000, 1001}, {999000, 2000}, {2999999, 2}, {13999997, -1},
-                                           {16785678, 3}, {22996176, 31},
-                                           {24001231, 17}, {29000001, 989999}, {29999999, 1} };
-  // clang-format on
-  for (auto p : params) {
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
-    if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf::io::read_parquet(read_args);
-
-    p.first  = p.first < 0 ? 0 : p.first;
-    p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
-    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
-    auto expected = cudf::slice(col, slice_indices);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), expected[0]);
-  }
-}
-
-TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge)
-{
-  constexpr int num_rows = 5 * 1000000;
-  auto colp              = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
-  cudf::column_view col  = *colp;
-
-  // this file will have row groups of 1,000,000 each
-  cudf::table_view tbl({col});
-  auto filepath = temp_env->get_temp_filepath("ListUserBoundsWithNullsLarge.parquet");
-  cudf::io::parquet_writer_options out_args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
-  cudf::io::write_parquet(out_args);
-
-  // skip_rows / num_rows
-  // clang-format off
-  std::vector<std::pair<int, int>> params{ {-1, -1}, {31, -1}, {32, -1}, {33, -1}, {161470, -1}, {4499997, -1},
-                                           {31, 1}, {32, 1}, {33, 1},
-                                           // deliberately span some row group boundaries
-                                           {999000, 1001}, {999000, 2000}, {2999999, 2},
-                                           {1678567, 3}, {4299676, 31},
-                                           {4001231, 17}, {1900000, 989999}, {4999999, 1} };
-  // clang-format on
-  for (auto p : params) {
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
-    if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf::io::read_parquet(read_args);
-
-    p.first  = p.first < 0 ? 0 : p.first;
-    p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
-    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
-    auto expected = cudf::slice(col, slice_indices);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), expected[0]);
-  }
-}
-
-TEST_F(ParquetReaderTest, ReorderedColumns)
-{
-  {
-    auto a = cudf::test::strings_column_wrapper{{"a", "", "c"}, {true, false, true}};
-    auto b = cudf::test::fixed_width_column_wrapper<int>{1, 2, 3};
-
-    cudf::table_view tbl{{a, b}};
-    auto filepath = temp_env->get_temp_filepath("ReorderedColumns.parquet");
-    cudf::io::table_input_metadata md(tbl);
-    md.column_metadata[0].set_name("a");
-    md.column_metadata[1].set_name("b");
-    cudf::io::parquet_writer_options opts =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md);
-    cudf::io::write_parquet(opts);
-
-    // read them out of order
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .columns({"b", "a"});
-    auto result = cudf::io::read_parquet(read_opts);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
-  }
-
-  {
-    auto a = cudf::test::fixed_width_column_wrapper<int>{1, 2, 3};
-    auto b = cudf::test::strings_column_wrapper{{"a", "", "c"}, {true, false, true}};
-
-    cudf::table_view tbl{{a, b}};
-    auto filepath = temp_env->get_temp_filepath("ReorderedColumns2.parquet");
-    cudf::io::table_input_metadata md(tbl);
-    md.column_metadata[0].set_name("a");
-    md.column_metadata[1].set_name("b");
-    cudf::io::parquet_writer_options opts =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md);
-    cudf::io::write_parquet(opts);
-
-    // read them out of order
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .columns({"b", "a"});
-    auto result = cudf::io::read_parquet(read_opts);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
-  }
-
-  auto a = cudf::test::fixed_width_column_wrapper<int>{1, 2, 3, 10, 20, 30};
-  auto b = cudf::test::strings_column_wrapper{{"a", "", "c", "cats", "dogs", "owls"},
-                                              {true, false, true, true, false, true}};
-  auto c = cudf::test::fixed_width_column_wrapper<int>{{15, 16, 17, 25, 26, 32},
-                                                       {false, true, true, true, true, false}};
-  auto d = cudf::test::strings_column_wrapper{"ducks", "sheep", "cows", "fish", "birds", "ants"};
-
-  cudf::table_view tbl{{a, b, c, d}};
-  auto filepath = temp_env->get_temp_filepath("ReorderedColumns3.parquet");
-  cudf::io::table_input_metadata md(tbl);
-  md.column_metadata[0].set_name("a");
-  md.column_metadata[1].set_name("b");
-  md.column_metadata[2].set_name("c");
-  md.column_metadata[3].set_name("d");
-  cudf::io::parquet_writer_options opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md);
-  cudf::io::write_parquet(opts);
-
-  {
-    // read them out of order
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .columns({"d", "a", "b", "c"});
-    auto result = cudf::io::read_parquet(read_opts);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), b);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(3), c);
-  }
-
-  {
-    // read them out of order
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .columns({"c", "d", "a", "b"});
-    auto result = cudf::io::read_parquet(read_opts);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), c);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), d);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), a);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(3), b);
-  }
-
-  {
-    // read them out of order
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .columns({"d", "c", "b", "a"});
-    auto result = cudf::io::read_parquet(read_opts);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), c);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), b);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(3), a);
-  }
-}
-
-TEST_F(ParquetReaderTest, SelectNestedColumn)
-{
-  // Struct<is_human:bool,
-  //        Struct<weight:float,
-  //               ages:int,
-  //               land_unit:List<int>>,
-  //               flats:List<List<int>>
-  //              >
-  //       >
-
-  auto weights_col = cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
-
-  auto ages_col =
-    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-
-  auto struct_1 = cudf::test::structs_column_wrapper{{weights_col, ages_col}, {1, 1, 1, 1, 0, 1}};
-
-  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
-    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
-
-  auto struct_2 =
-    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
-
-  auto input = table_view({*struct_2});
-
-  cudf::io::table_input_metadata input_metadata(input);
-  input_metadata.column_metadata[0].set_name("being");
-  input_metadata.column_metadata[0].child(0).set_name("human?");
-  input_metadata.column_metadata[0].child(1).set_name("particulars");
-  input_metadata.column_metadata[0].child(1).child(0).set_name("weight");
-  input_metadata.column_metadata[0].child(1).child(1).set_name("age");
-
-  auto filepath = temp_env->get_temp_filepath("SelectNestedColumn.parquet");
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input)
-      .metadata(&input_metadata);
-  cudf::io::write_parquet(args);
-
-  {  // Test selecting a single leaf from the table
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
-        .columns({"being.particulars.age"});
-    const auto result = cudf::io::read_parquet(read_args);
-
-    auto expect_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
-      {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-    auto expect_s_1 = cudf::test::structs_column_wrapper{{expect_ages_col}, {1, 1, 1, 1, 0, 1}};
-    auto expect_s_2 =
-      cudf::test::structs_column_wrapper{{expect_s_1}, {0, 1, 1, 1, 1, 1}}.release();
-    auto expected = table_view({*expect_s_2});
-
-    cudf::io::table_input_metadata expected_metadata(expected);
-    expected_metadata.column_metadata[0].set_name("being");
-    expected_metadata.column_metadata[0].child(0).set_name("particulars");
-    expected_metadata.column_metadata[0].child(0).child(0).set_name("age");
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-  }
-
-  {  // Test selecting a non-leaf and expecting all hierarchy from that node onwards
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
-        .columns({"being.particulars"});
-    const auto result = cudf::io::read_parquet(read_args);
-
-    auto expected_weights_col =
-      cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
-
-    auto expected_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
-      {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-
-    auto expected_s_1 = cudf::test::structs_column_wrapper{
-      {expected_weights_col, expected_ages_col}, {1, 1, 1, 1, 0, 1}};
-
-    auto expect_s_2 =
-      cudf::test::structs_column_wrapper{{expected_s_1}, {0, 1, 1, 1, 1, 1}}.release();
-    auto expected = table_view({*expect_s_2});
-
-    cudf::io::table_input_metadata expected_metadata(expected);
-    expected_metadata.column_metadata[0].set_name("being");
-    expected_metadata.column_metadata[0].child(0).set_name("particulars");
-    expected_metadata.column_metadata[0].child(0).child(0).set_name("weight");
-    expected_metadata.column_metadata[0].child(0).child(1).set_name("age");
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-  }
-
-  {  // Test selecting struct children out of order
-    cudf::io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
-        .columns({"being.particulars.age", "being.particulars.weight", "being.human?"});
-    const auto result = cudf::io::read_parquet(read_args);
-
-    auto expected_weights_col =
-      cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
-
-    auto expected_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
-      {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
-
-    auto expected_is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
-      {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
-
-    auto expect_s_1 = cudf::test::structs_column_wrapper{{expected_ages_col, expected_weights_col},
-                                                         {1, 1, 1, 1, 0, 1}};
-
-    auto expect_s_2 =
-      cudf::test::structs_column_wrapper{{expect_s_1, expected_is_human_col}, {0, 1, 1, 1, 1, 1}}
-        .release();
-
-    auto expected = table_view({*expect_s_2});
-
-    cudf::io::table_input_metadata expected_metadata(expected);
-    expected_metadata.column_metadata[0].set_name("being");
-    expected_metadata.column_metadata[0].child(0).set_name("particulars");
-    expected_metadata.column_metadata[0].child(0).child(0).set_name("age");
-    expected_metadata.column_metadata[0].child(0).child(1).set_name("weight");
-    expected_metadata.column_metadata[0].child(1).set_name("human?");
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
-  }
-}
-
-TEST_F(ParquetReaderTest, DecimalRead)
-{
-  {
-    /* We could add a dataset to include this file, but we don't want tests in cudf to have data.
-       This test is a temporary test until python gains the ability to write decimal, so we're
-       embedding
-       a parquet file directly into the code here to prevent issues with finding the file */
-    const unsigned char decimals_parquet[] = {
-      0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0xb0, 0x03, 0x15, 0xb8, 0x03, 0x2c, 0x15, 0x6a,
-      0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36, 0x02, 0x28, 0x04, 0x7f, 0x96, 0x98, 0x00,
-      0x18, 0x04, 0x81, 0x69, 0x67, 0xff, 0x00, 0x00, 0x00, 0xd8, 0x01, 0xf0, 0xd7, 0x04, 0x00,
-      0x00, 0x00, 0x64, 0x01, 0x03, 0x06, 0x68, 0x12, 0xdc, 0xff, 0xbd, 0x18, 0xfd, 0xff, 0x64,
-      0x13, 0x80, 0x00, 0xb3, 0x5d, 0x62, 0x00, 0x90, 0x35, 0xa9, 0xff, 0xa2, 0xde, 0xe3, 0xff,
-      0xe9, 0xbf, 0x96, 0xff, 0x1f, 0x8a, 0x98, 0xff, 0xb1, 0x50, 0x34, 0x00, 0x88, 0x24, 0x59,
-      0x00, 0x2a, 0x33, 0xbe, 0xff, 0xd5, 0x16, 0xbc, 0xff, 0x13, 0x50, 0x8d, 0xff, 0xcb, 0x63,
-      0x2d, 0x00, 0x80, 0x8f, 0xbe, 0xff, 0x82, 0x40, 0x10, 0x00, 0x84, 0x68, 0x70, 0xff, 0x9b,
-      0x69, 0x78, 0x00, 0x14, 0x6c, 0x10, 0x00, 0x50, 0xd9, 0xe1, 0xff, 0xaa, 0xcd, 0x6a, 0x00,
-      0xcf, 0xb1, 0x28, 0x00, 0x77, 0x57, 0x8d, 0x00, 0xee, 0x05, 0x79, 0x00, 0xf0, 0x15, 0xeb,
-      0xff, 0x02, 0xe2, 0x06, 0x00, 0x87, 0x43, 0x86, 0x00, 0xf8, 0x2d, 0x2e, 0x00, 0xee, 0x2e,
-      0x98, 0xff, 0x39, 0xcb, 0x4d, 0x00, 0x1e, 0x6b, 0xea, 0xff, 0x80, 0x8e, 0x6c, 0xff, 0x97,
-      0x25, 0x26, 0x00, 0x4d, 0x0d, 0x0a, 0x00, 0xca, 0x64, 0x7f, 0x00, 0xf4, 0xbe, 0xa1, 0xff,
-      0xe2, 0x12, 0x6c, 0xff, 0xbd, 0x77, 0xae, 0xff, 0xf9, 0x4b, 0x36, 0x00, 0xb0, 0xe3, 0x79,
-      0xff, 0xa2, 0x2a, 0x29, 0x00, 0xcd, 0x06, 0xbc, 0xff, 0x2d, 0xa3, 0x7e, 0x00, 0xa9, 0x08,
-      0xa1, 0xff, 0xbf, 0x81, 0xd0, 0xff, 0x4f, 0x03, 0x73, 0x00, 0xb0, 0x99, 0x0c, 0x00, 0xbd,
-      0x6f, 0xf8, 0xff, 0x6b, 0x02, 0x05, 0x00, 0xc1, 0xe1, 0xba, 0xff, 0x81, 0x69, 0x67, 0xff,
-      0x7f, 0x96, 0x98, 0x00, 0x15, 0x00, 0x15, 0xd0, 0x06, 0x15, 0xda, 0x06, 0x2c, 0x15, 0x6a,
-      0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36, 0x02, 0x28, 0x08, 0xff, 0x3f, 0x7a, 0x10,
-      0xf3, 0x5a, 0x00, 0x00, 0x18, 0x08, 0x01, 0xc0, 0x85, 0xef, 0x0c, 0xa5, 0xff, 0xff, 0x00,
-      0x00, 0x00, 0xa8, 0x03, 0xf4, 0xa7, 0x01, 0x04, 0x00, 0x00, 0x00, 0x64, 0x01, 0x03, 0x06,
-      0x55, 0x6f, 0xc5, 0xe4, 0x9f, 0x1a, 0x00, 0x00, 0x47, 0x89, 0x0a, 0xe8, 0x58, 0xf0, 0xff,
-      0xff, 0x63, 0xee, 0x21, 0xdd, 0xdd, 0xca, 0xff, 0xff, 0xbe, 0x6f, 0x3b, 0xaa, 0xe9, 0x3d,
-      0x00, 0x00, 0xd6, 0x91, 0x2a, 0xb7, 0x08, 0x02, 0x00, 0x00, 0x75, 0x45, 0x2c, 0xd7, 0x76,
-      0x0c, 0x00, 0x00, 0x54, 0x49, 0x92, 0x44, 0x9c, 0xbf, 0xff, 0xff, 0x41, 0xa9, 0x6d, 0xec,
-      0x7a, 0xd0, 0xff, 0xff, 0x27, 0xa0, 0x23, 0x41, 0x44, 0xc1, 0xff, 0xff, 0x18, 0xd4, 0xe1,
-      0x30, 0xd3, 0xe0, 0xff, 0xff, 0x59, 0xac, 0x14, 0xf4, 0xec, 0x58, 0x00, 0x00, 0x2c, 0x17,
-      0x29, 0x57, 0x44, 0x13, 0x00, 0x00, 0xa2, 0x0d, 0x4a, 0xcc, 0x63, 0xff, 0xff, 0xff, 0x81,
-      0x33, 0xbc, 0xda, 0xd5, 0xda, 0xff, 0xff, 0x4c, 0x05, 0xf4, 0x78, 0x19, 0xea, 0xff, 0xff,
-      0x06, 0x71, 0x25, 0xde, 0x5a, 0xaf, 0xff, 0xff, 0x95, 0x32, 0x5f, 0x76, 0x98, 0xb3, 0xff,
-      0xff, 0xf1, 0x34, 0x3c, 0xbf, 0xa8, 0xbe, 0xff, 0xff, 0x27, 0x73, 0x40, 0x0c, 0x7d, 0xcd,
-      0xff, 0xff, 0x68, 0xa9, 0xc2, 0xe9, 0x2c, 0x03, 0x00, 0x00, 0x3f, 0x79, 0xd9, 0x04, 0x8c,
-      0xe5, 0xff, 0xff, 0x91, 0xb4, 0x9b, 0xe3, 0x8f, 0x21, 0x00, 0x00, 0xb8, 0x20, 0xc8, 0xc2,
-      0x4d, 0xa6, 0xff, 0xff, 0x47, 0xfa, 0xde, 0x36, 0x4a, 0xf3, 0xff, 0xff, 0x72, 0x80, 0x94,
-      0x59, 0xdd, 0x4e, 0x00, 0x00, 0x29, 0xe4, 0xd6, 0x43, 0xb0, 0xf0, 0xff, 0xff, 0x68, 0x36,
-      0xbc, 0x2d, 0xd1, 0xa9, 0xff, 0xff, 0xbc, 0xe4, 0xbe, 0xd7, 0xed, 0x1b, 0x00, 0x00, 0x02,
-      0x8b, 0xcb, 0xd7, 0xed, 0x47, 0x00, 0x00, 0x3c, 0x06, 0xe4, 0xda, 0xc7, 0x47, 0x00, 0x00,
-      0xf3, 0x39, 0x55, 0x28, 0x97, 0xba, 0xff, 0xff, 0x07, 0x79, 0x38, 0x4e, 0xe0, 0x21, 0x00,
-      0x00, 0xde, 0xed, 0x1c, 0x23, 0x09, 0x49, 0x00, 0x00, 0x49, 0x46, 0x49, 0x5d, 0x8f, 0x34,
-      0x00, 0x00, 0x38, 0x18, 0x50, 0xf6, 0xa1, 0x11, 0x00, 0x00, 0xdf, 0xb8, 0x19, 0x14, 0xd1,
-      0xe1, 0xff, 0xff, 0x2c, 0x56, 0x72, 0x93, 0x64, 0x3f, 0x00, 0x00, 0x1c, 0xe0, 0xbe, 0x87,
-      0x7d, 0xf9, 0xff, 0xff, 0x73, 0x0e, 0x3c, 0x01, 0x91, 0xf9, 0xff, 0xff, 0xb2, 0x37, 0x85,
-      0x81, 0x5f, 0x54, 0x00, 0x00, 0x58, 0x44, 0xb0, 0x1a, 0xac, 0xbb, 0xff, 0xff, 0x36, 0xbf,
-      0xbe, 0x5e, 0x22, 0xff, 0xff, 0xff, 0x06, 0x20, 0xa0, 0x23, 0x0d, 0x3b, 0x00, 0x00, 0x19,
-      0xc6, 0x49, 0x0a, 0x00, 0xcf, 0xff, 0xff, 0x4f, 0xcd, 0xc6, 0x95, 0x4b, 0xf1, 0xff, 0xff,
-      0xa3, 0x59, 0xaf, 0x65, 0xec, 0xe9, 0xff, 0xff, 0x58, 0xef, 0x05, 0x50, 0x63, 0xe4, 0xff,
-      0xff, 0xc7, 0x6a, 0x9e, 0xf1, 0x69, 0x20, 0x00, 0x00, 0xd1, 0xb3, 0xc9, 0x14, 0xb2, 0x29,
-      0x00, 0x00, 0x1d, 0x48, 0x16, 0x70, 0xf0, 0x40, 0x00, 0x00, 0x01, 0xc0, 0x85, 0xef, 0x0c,
-      0xa5, 0xff, 0xff, 0xff, 0x3f, 0x7a, 0x10, 0xf3, 0x5a, 0x00, 0x00, 0x15, 0x00, 0x15, 0x90,
-      0x0d, 0x15, 0x9a, 0x0d, 0x2c, 0x15, 0x6a, 0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36,
-      0x02, 0x28, 0x10, 0x4b, 0x3b, 0x4c, 0xa8, 0x5a, 0x86, 0xc4, 0x7a, 0x09, 0x8a, 0x22, 0x3f,
-      0xff, 0xff, 0xff, 0xff, 0x18, 0x10, 0xb4, 0xc4, 0xb3, 0x57, 0xa5, 0x79, 0x3b, 0x85, 0xf6,
-      0x75, 0xdd, 0xc0, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xc8, 0x06, 0xf4, 0x47, 0x03,
-      0x04, 0x00, 0x00, 0x00, 0x64, 0x01, 0x03, 0x06, 0x05, 0x49, 0xf7, 0xfc, 0x89, 0x3d, 0x3e,
-      0x20, 0x07, 0x72, 0x3e, 0xa1, 0x66, 0x81, 0x67, 0x80, 0x23, 0x78, 0x06, 0x68, 0x0e, 0x78,
-      0xf5, 0x08, 0xed, 0x20, 0xcd, 0x0e, 0x7f, 0x9c, 0x70, 0xa0, 0xb9, 0x16, 0x44, 0xb2, 0x41,
-      0x62, 0xba, 0x82, 0xad, 0xe1, 0x12, 0x9b, 0xa6, 0x53, 0x8d, 0x20, 0x27, 0xd5, 0x84, 0x63,
-      0xb8, 0x07, 0x4b, 0x5b, 0xa4, 0x1c, 0xa4, 0x1c, 0x17, 0xbf, 0x4b, 0x00, 0x24, 0x04, 0x56,
-      0xa8, 0x52, 0xaf, 0x33, 0xf7, 0xad, 0x7c, 0xc8, 0x83, 0x25, 0x13, 0xaf, 0x80, 0x25, 0x6f,
-      0xbd, 0xd1, 0x15, 0x69, 0x64, 0x20, 0x7b, 0xd7, 0x33, 0xba, 0x66, 0x29, 0x8a, 0x00, 0xda,
-      0x42, 0x07, 0x2c, 0x6c, 0x39, 0x76, 0x9f, 0xdc, 0x17, 0xad, 0xb6, 0x58, 0xdf, 0x5f, 0x00,
-      0x18, 0x3a, 0xae, 0x1c, 0xd6, 0x5f, 0x9d, 0x78, 0x8d, 0x73, 0xdd, 0x3e, 0xd6, 0x18, 0x33,
-      0x40, 0xe4, 0x36, 0xde, 0xb0, 0xb7, 0x33, 0x2a, 0x6b, 0x08, 0x03, 0x6c, 0x6d, 0x8f, 0x13,
-      0x93, 0xd0, 0xd7, 0x87, 0x62, 0x63, 0x53, 0xfb, 0xd8, 0xbb, 0xc9, 0x54, 0x90, 0xd6, 0xa9,
-      0x8f, 0xc8, 0x60, 0xbd, 0xec, 0x75, 0x23, 0x9a, 0x21, 0xec, 0xe4, 0x86, 0x43, 0xd7, 0xc1,
-      0x88, 0xdc, 0x82, 0x00, 0x32, 0x79, 0xc9, 0x2b, 0x70, 0x85, 0xb7, 0x25, 0xa1, 0xcc, 0x7d,
-      0x0b, 0x29, 0x03, 0xea, 0x80, 0xff, 0x9b, 0xf3, 0x24, 0x7f, 0xd1, 0xff, 0xf0, 0x22, 0x65,
-      0x85, 0x99, 0x17, 0x63, 0xc2, 0xc0, 0xb7, 0x62, 0x05, 0xda, 0x7a, 0xa0, 0xc3, 0x2a, 0x6f,
-      0x1f, 0xee, 0x1f, 0x31, 0xa8, 0x42, 0x80, 0xe4, 0xb7, 0x6c, 0xf6, 0xac, 0x47, 0xb0, 0x17,
-      0x69, 0xcb, 0xff, 0x66, 0x8a, 0xd6, 0x25, 0x00, 0xf3, 0xcf, 0x0a, 0xaf, 0xf8, 0x92, 0x8a,
-      0xa0, 0xdf, 0x71, 0x13, 0x8d, 0x9d, 0xff, 0x7e, 0xe0, 0x0a, 0x52, 0xf1, 0x97, 0x01, 0xa9,
-      0x73, 0x27, 0xfd, 0x63, 0x58, 0x00, 0x32, 0xa6, 0xf6, 0x78, 0xb8, 0xe4, 0xfd, 0x20, 0x7c,
-      0x90, 0xee, 0xad, 0x8c, 0xc9, 0x71, 0x35, 0x66, 0x71, 0x3c, 0xe0, 0xe4, 0x0b, 0xbb, 0xa0,
-      0x50, 0xe9, 0xf2, 0x81, 0x1d, 0x3a, 0x95, 0x94, 0x00, 0xd5, 0x49, 0x00, 0x07, 0xdf, 0x21,
-      0x53, 0x36, 0x8d, 0x9e, 0xd9, 0xa5, 0x52, 0x4d, 0x0d, 0x29, 0x74, 0xf0, 0x40, 0xbd, 0xda,
-      0x63, 0x4e, 0xdd, 0x91, 0x8e, 0xa6, 0xa7, 0xf6, 0x78, 0x58, 0x3b, 0x0a, 0x5c, 0x60, 0x3c,
-      0x15, 0x34, 0xf8, 0x2c, 0x21, 0xe3, 0x56, 0x1b, 0x9e, 0xd9, 0x56, 0xd3, 0x13, 0x2e, 0x80,
-      0x2c, 0x36, 0xda, 0x1d, 0xc8, 0xfb, 0x52, 0xee, 0x17, 0xb3, 0x2b, 0xf3, 0xd2, 0xeb, 0x29,
-      0xa0, 0x37, 0xa0, 0x12, 0xce, 0x1c, 0x50, 0x6a, 0xf4, 0x11, 0xcd, 0x96, 0x88, 0x3f, 0x43,
-      0x78, 0xc0, 0x2c, 0x53, 0x6c, 0xa6, 0xdf, 0xb9, 0x9e, 0x93, 0xd4, 0x1e, 0xa9, 0x7f, 0x67,
-      0xa6, 0xc1, 0x80, 0x46, 0x0f, 0x63, 0x7d, 0x15, 0xf2, 0x4c, 0xc5, 0xda, 0x11, 0x9a, 0x20,
-      0x67, 0x27, 0xe8, 0x00, 0xec, 0x03, 0x1d, 0x15, 0xa7, 0x92, 0xb3, 0x1f, 0xda, 0x20, 0x92,
-      0xd8, 0x00, 0xfb, 0x06, 0x80, 0xeb, 0x4b, 0x0c, 0xc1, 0x1f, 0x49, 0x40, 0x06, 0x8d, 0x8a,
-      0xf8, 0x34, 0xb1, 0x0c, 0x1d, 0x20, 0xd0, 0x47, 0xe5, 0xb1, 0x7e, 0xf7, 0xe4, 0xb4, 0x7e,
-      0x9c, 0x84, 0x18, 0x61, 0x32, 0x4f, 0xc0, 0xc2, 0xb2, 0xcc, 0x63, 0xf6, 0xe1, 0x16, 0xd6,
-      0xd9, 0x4b, 0x74, 0x13, 0x01, 0xa1, 0xe2, 0x00, 0xb7, 0x9e, 0xc1, 0x3a, 0xc5, 0xaf, 0xe8,
-      0x54, 0x07, 0x2a, 0x20, 0xfd, 0x2c, 0x6f, 0xb9, 0x80, 0x18, 0x92, 0x87, 0xa0, 0x81, 0x24,
-      0x60, 0x47, 0x17, 0x4f, 0xbc, 0xbe, 0xf5, 0x03, 0x69, 0x80, 0xe3, 0x10, 0x54, 0xd6, 0x68,
-      0x7d, 0x75, 0xd3, 0x0a, 0x45, 0x38, 0x9e, 0xa9, 0xfd, 0x05, 0x40, 0xd2, 0x1e, 0x6f, 0x5c,
-      0x30, 0x10, 0xfe, 0x9b, 0x9f, 0x6d, 0xc0, 0x9d, 0x6c, 0x17, 0x7d, 0x00, 0x09, 0xb6, 0x8a,
-      0x31, 0x8e, 0x1b, 0x6b, 0x84, 0x1e, 0x79, 0xce, 0x10, 0x55, 0x59, 0x6a, 0x40, 0x16, 0xdc,
-      0x9a, 0xcf, 0x4d, 0xb0, 0x8f, 0xac, 0xe3, 0x8d, 0xee, 0xd2, 0xef, 0x01, 0x8c, 0xe0, 0x2b,
-      0x24, 0xe5, 0xb4, 0xe1, 0x86, 0x72, 0x00, 0x30, 0x07, 0xce, 0x02, 0x23, 0x41, 0x33, 0x40,
-      0xf0, 0x9b, 0xc2, 0x2d, 0x30, 0xec, 0x3b, 0x17, 0xb2, 0x8f, 0x64, 0x7d, 0xcd, 0x70, 0x9e,
-      0x80, 0x22, 0xb5, 0xdf, 0x6d, 0x2a, 0x43, 0xd4, 0x2b, 0x5a, 0xf6, 0x96, 0xa6, 0xea, 0x91,
-      0x62, 0x80, 0x39, 0xf2, 0x5a, 0x8e, 0xc0, 0xb9, 0x29, 0x99, 0x17, 0xe7, 0x35, 0x2c, 0xf6,
-      0x4d, 0x18, 0x00, 0x48, 0x10, 0x85, 0xb4, 0x3f, 0x89, 0x60, 0x49, 0x6e, 0xf0, 0xcd, 0x9d,
-      0x92, 0xeb, 0x96, 0x80, 0xcf, 0xf9, 0xf1, 0x46, 0x1d, 0xc0, 0x49, 0xb3, 0x36, 0x2e, 0x24,
-      0xc8, 0xdb, 0x41, 0x72, 0x20, 0xf5, 0xde, 0x5c, 0xf9, 0x4a, 0x6e, 0xa0, 0x0b, 0x13, 0xfc,
-      0x2d, 0x17, 0x07, 0x16, 0x5e, 0x00, 0x3c, 0x54, 0x41, 0x0e, 0xa2, 0x0d, 0xf3, 0x48, 0x12,
-      0x2e, 0x7c, 0xab, 0x3c, 0x59, 0x1c, 0x40, 0xca, 0xb0, 0x71, 0xc7, 0x29, 0xf0, 0xbb, 0x9f,
-      0xf4, 0x3f, 0x25, 0x49, 0xad, 0xc2, 0x8f, 0x80, 0x04, 0x38, 0x6d, 0x35, 0x02, 0xca, 0xe6,
-      0x02, 0x83, 0x89, 0x4e, 0x74, 0xdb, 0x08, 0x5a, 0x80, 0x13, 0x99, 0xd4, 0x26, 0xc1, 0x27,
-      0xce, 0xb0, 0x98, 0x99, 0xca, 0xf6, 0x3e, 0x50, 0x49, 0xd0, 0xbf, 0xcb, 0x6f, 0xbe, 0x5b,
-      0x92, 0x63, 0xde, 0x94, 0xd3, 0x8f, 0x07, 0x06, 0x0f, 0x2b, 0x80, 0x36, 0xf1, 0x77, 0xf6,
-      0x29, 0x33, 0x13, 0xa9, 0x4a, 0x55, 0x3d, 0x6c, 0xca, 0xdb, 0x4e, 0x40, 0xc4, 0x95, 0x54,
-      0xf4, 0xe2, 0x8c, 0x1b, 0xa0, 0xfe, 0x30, 0x50, 0x9d, 0x62, 0xbc, 0x5c, 0x00, 0xb4, 0xc4,
-      0xb3, 0x57, 0xa5, 0x79, 0x3b, 0x85, 0xf6, 0x75, 0xdd, 0xc0, 0x00, 0x00, 0x00, 0x01, 0x4b,
-      0x3b, 0x4c, 0xa8, 0x5a, 0x86, 0xc4, 0x7a, 0x09, 0x8a, 0x22, 0x3f, 0xff, 0xff, 0xff, 0xff,
-      0x15, 0x02, 0x19, 0x4c, 0x48, 0x0c, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x5f, 0x73, 0x63, 0x68,
-      0x65, 0x6d, 0x61, 0x15, 0x06, 0x00, 0x15, 0x02, 0x25, 0x02, 0x18, 0x06, 0x64, 0x65, 0x63,
-      0x37, 0x70, 0x34, 0x25, 0x0a, 0x15, 0x08, 0x15, 0x0e, 0x00, 0x15, 0x04, 0x25, 0x02, 0x18,
-      0x07, 0x64, 0x65, 0x63, 0x31, 0x34, 0x70, 0x35, 0x25, 0x0a, 0x15, 0x0a, 0x15, 0x1c, 0x00,
-      0x15, 0x0e, 0x15, 0x20, 0x15, 0x02, 0x18, 0x08, 0x64, 0x65, 0x63, 0x33, 0x38, 0x70, 0x31,
-      0x38, 0x25, 0x0a, 0x15, 0x24, 0x15, 0x4c, 0x00, 0x16, 0x6a, 0x19, 0x1c, 0x19, 0x3c, 0x26,
-      0x08, 0x1c, 0x15, 0x02, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x06, 0x64, 0x65, 0x63,
-      0x37, 0x70, 0x34, 0x15, 0x02, 0x16, 0x6a, 0x16, 0xf6, 0x03, 0x16, 0xfe, 0x03, 0x26, 0x08,
-      0x3c, 0x36, 0x02, 0x28, 0x04, 0x7f, 0x96, 0x98, 0x00, 0x18, 0x04, 0x81, 0x69, 0x67, 0xff,
-      0x00, 0x19, 0x1c, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x26, 0x86, 0x04,
-      0x1c, 0x15, 0x04, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x07, 0x64, 0x65, 0x63, 0x31,
-      0x34, 0x70, 0x35, 0x15, 0x02, 0x16, 0x6a, 0x16, 0xa6, 0x07, 0x16, 0xb0, 0x07, 0x26, 0x86,
-      0x04, 0x3c, 0x36, 0x02, 0x28, 0x08, 0xff, 0x3f, 0x7a, 0x10, 0xf3, 0x5a, 0x00, 0x00, 0x18,
-      0x08, 0x01, 0xc0, 0x85, 0xef, 0x0c, 0xa5, 0xff, 0xff, 0x00, 0x19, 0x1c, 0x15, 0x00, 0x15,
-      0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x26, 0xb6, 0x0b, 0x1c, 0x15, 0x0e, 0x19, 0x35, 0x06,
-      0x08, 0x00, 0x19, 0x18, 0x08, 0x64, 0x65, 0x63, 0x33, 0x38, 0x70, 0x31, 0x38, 0x15, 0x02,
-      0x16, 0x6a, 0x16, 0x86, 0x0e, 0x16, 0x90, 0x0e, 0x26, 0xb6, 0x0b, 0x3c, 0x36, 0x02, 0x28,
-      0x10, 0x4b, 0x3b, 0x4c, 0xa8, 0x5a, 0x86, 0xc4, 0x7a, 0x09, 0x8a, 0x22, 0x3f, 0xff, 0xff,
-      0xff, 0xff, 0x18, 0x10, 0xb4, 0xc4, 0xb3, 0x57, 0xa5, 0x79, 0x3b, 0x85, 0xf6, 0x75, 0xdd,
-      0xc0, 0x00, 0x00, 0x00, 0x01, 0x00, 0x19, 0x1c, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00,
-      0x00, 0x00, 0x16, 0xa2, 0x19, 0x16, 0x6a, 0x00, 0x19, 0x2c, 0x18, 0x18, 0x6f, 0x72, 0x67,
-      0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x2e, 0x76,
-      0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x33, 0x2e, 0x30, 0x2e, 0x31, 0x00, 0x18,
-      0x29, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x73, 0x70, 0x61,
-      0x72, 0x6b, 0x2e, 0x73, 0x71, 0x6c, 0x2e, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2e,
-      0x72, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0xf4, 0x01,
-      0x7b, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3a, 0x22, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74,
-      0x22, 0x2c, 0x22, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x22, 0x3a, 0x5b, 0x7b, 0x22, 0x6e,
-      0x61, 0x6d, 0x65, 0x22, 0x3a, 0x22, 0x64, 0x65, 0x63, 0x37, 0x70, 0x34, 0x22, 0x2c, 0x22,
-      0x74, 0x79, 0x70, 0x65, 0x22, 0x3a, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6d, 0x61, 0x6c, 0x28,
-      0x37, 0x2c, 0x34, 0x29, 0x22, 0x2c, 0x22, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65,
-      0x22, 0x3a, 0x74, 0x72, 0x75, 0x65, 0x2c, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
-      0x61, 0x22, 0x3a, 0x7b, 0x7d, 0x7d, 0x2c, 0x7b, 0x22, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x3a,
-      0x22, 0x64, 0x65, 0x63, 0x31, 0x34, 0x70, 0x35, 0x22, 0x2c, 0x22, 0x74, 0x79, 0x70, 0x65,
-      0x22, 0x3a, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6d, 0x61, 0x6c, 0x28, 0x31, 0x34, 0x2c, 0x35,
-      0x29, 0x22, 0x2c, 0x22, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x22, 0x3a, 0x74,
-      0x72, 0x75, 0x65, 0x2c, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3a,
-      0x7b, 0x7d, 0x7d, 0x2c, 0x7b, 0x22, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x3a, 0x22, 0x64, 0x65,
-      0x63, 0x33, 0x38, 0x70, 0x31, 0x38, 0x22, 0x2c, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3a,
-      0x22, 0x64, 0x65, 0x63, 0x69, 0x6d, 0x61, 0x6c, 0x28, 0x33, 0x38, 0x2c, 0x31, 0x38, 0x29,
-      0x22, 0x2c, 0x22, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x22, 0x3a, 0x74, 0x72,
-      0x75, 0x65, 0x2c, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3a, 0x7b,
-      0x7d, 0x7d, 0x5d, 0x7d, 0x00, 0x18, 0x4a, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2d,
-      0x6d, 0x72, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x31, 0x2e, 0x31, 0x30,
-      0x2e, 0x31, 0x20, 0x28, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x20, 0x61, 0x38, 0x39, 0x64, 0x66,
-      0x38, 0x66, 0x39, 0x39, 0x33, 0x32, 0x62, 0x36, 0x65, 0x66, 0x36, 0x36, 0x33, 0x33, 0x64,
-      0x30, 0x36, 0x30, 0x36, 0x39, 0x65, 0x35, 0x30, 0x63, 0x39, 0x62, 0x37, 0x39, 0x37, 0x30,
-      0x62, 0x65, 0x62, 0x64, 0x31, 0x29, 0x19, 0x3c, 0x1c, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x1c,
-      0x00, 0x00, 0x00, 0xd3, 0x02, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
-    unsigned int decimals_parquet_len = 2366;
-
-    cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder(
-      cudf::io::source_info{reinterpret_cast<const char*>(decimals_parquet), decimals_parquet_len});
-    auto result = cudf::io::read_parquet(read_opts);
-
-    auto validity =
-      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 50; });
-
-    EXPECT_EQ(result.tbl->view().num_columns(), 3);
-
-    int32_t col0_data[] = {
-      -2354584, -190275,  8393572,  6446515,  -5687920, -1843550, -6897687, -6780385, 3428529,
-      5842056,  -4312278, -4450603, -7516141, 2974667,  -4288640, 1065090,  -9410428, 7891355,
-      1076244,  -1975984, 6999466,  2666959,  9262967,  7931374,  -1370640, 451074,   8799111,
-      3026424,  -6803730, 5098297,  -1414370, -9662848, 2499991,  658765,   8348874,  -6177036,
-      -9694494, -5343299, 3558393,  -8789072, 2697890,  -4454707, 8299309,  -6223703, -3112513,
-      7537487,  825776,   -495683,  328299,   -4529727, 0,        -9999999, 9999999};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(0).size()),
-              sizeof(col0_data) / sizeof(col0_data[0]));
-    cudf::test::fixed_point_column_wrapper<int32_t> col0(
-      std::begin(col0_data), std::end(col0_data), validity, numeric::scale_type{-4});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), col0);
-
-    int64_t col1_data[] = {29274040266581,  -17210335917753, -58420730139037,
-                           68073792696254,  2236456014294,   13704555677045,
-                           -70797090469548, -52248605513407, -68976081919961,
-                           -34277313883112, 97774730521689,  21184241014572,
-                           -670882460254,   -40862944054399, -24079852370612,
-                           -88670167797498, -84007574359403, -71843004533519,
-                           -55538016554201, 3491435293032,   -29085437167297,
-                           36901882672273,  -98622066122568, -13974902998457,
-                           86712597643378,  -16835133643735, -94759096142232,
-                           30708340810940,  79086853262082,  78923696440892,
-                           -76316597208589, 37247268714759,  80303592631774,
-                           57790350050889,  19387319851064,  -33186875066145,
-                           69701203023404,  -7157433049060,  -7073790423437,
-                           92769171617714,  -75127120182184, -951893180618,
-                           64927618310150,  -53875897154023, -16168039035569,
-                           -24273449166429, -30359781249192, 35639397345991,
-                           45844829680593,  71401416837149,  0,
-                           -99999999999999, 99999999999999};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(1).size()),
-              sizeof(col1_data) / sizeof(col1_data[0]));
-    cudf::test::fixed_point_column_wrapper<int64_t> col1(
-      std::begin(col1_data), std::end(col1_data), validity, numeric::scale_type{-5});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1);
-
-    cudf::io::parquet_reader_options read_strict_opts = read_opts;
-    read_strict_opts.set_columns({"dec7p4", "dec14p5"});
-    EXPECT_NO_THROW(cudf::io::read_parquet(read_strict_opts));
-  }
-  {
-    // dec7p3: Decimal(precision=7, scale=3) backed by FIXED_LENGTH_BYTE_ARRAY(length = 4)
-    // dec12p11: Decimal(precision=12, scale=11) backed by FIXED_LENGTH_BYTE_ARRAY(length = 6)
-    // dec20p1: Decimal(precision=20, scale=1) backed by FIXED_LENGTH_BYTE_ARRAY(length = 9)
-    const unsigned char fixed_len_bytes_decimal_parquet[] = {
-      0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0xA8, 0x01, 0x15, 0xAE, 0x01, 0x2C, 0x15, 0x28,
-      0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x02, 0x28, 0x04, 0x00, 0x97, 0x45, 0x72,
-      0x18, 0x04, 0x00, 0x01, 0x81, 0x3B, 0x00, 0x00, 0x00, 0x54, 0xF0, 0x53, 0x04, 0x00, 0x00,
-      0x00, 0x26, 0x01, 0x03, 0x00, 0x00, 0x61, 0x10, 0xCF, 0x00, 0x0A, 0xA9, 0x08, 0x00, 0x77,
-      0x58, 0x6F, 0x00, 0x6B, 0xEE, 0xA4, 0x00, 0x92, 0xF8, 0x94, 0x00, 0x2E, 0x18, 0xD4, 0x00,
-      0x4F, 0x45, 0x33, 0x00, 0x97, 0x45, 0x72, 0x00, 0x0D, 0xC2, 0x75, 0x00, 0x76, 0xAA, 0xAA,
-      0x00, 0x30, 0x9F, 0x86, 0x00, 0x4B, 0x9D, 0xB1, 0x00, 0x4E, 0x4B, 0x3B, 0x00, 0x01, 0x81,
-      0x3B, 0x00, 0x22, 0xD4, 0x53, 0x00, 0x72, 0xC4, 0xAF, 0x00, 0x43, 0x9B, 0x72, 0x00, 0x1D,
-      0x91, 0xC3, 0x00, 0x45, 0x27, 0x48, 0x15, 0x00, 0x15, 0xF4, 0x01, 0x15, 0xFA, 0x01, 0x2C,
-      0x15, 0x28, 0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x02, 0x28, 0x06, 0x00, 0xD5,
-      0xD7, 0x31, 0x99, 0xA6, 0x18, 0x06, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00, 0x00, 0x00,
-      0x7A, 0xF0, 0x79, 0x04, 0x00, 0x00, 0x00, 0x24, 0x01, 0x03, 0x02, 0x00, 0x54, 0x23, 0xCF,
-      0x13, 0x0A, 0x00, 0x07, 0x22, 0xB1, 0x21, 0x7E, 0x00, 0x64, 0x19, 0xD6, 0xD2, 0xA5, 0x00,
-      0x61, 0x7F, 0xF6, 0xB9, 0xB0, 0x00, 0xD0, 0x7F, 0x9C, 0xA9, 0xE9, 0x00, 0x65, 0x58, 0xF0,
-      0xAD, 0xFB, 0x00, 0xBC, 0x61, 0xE2, 0x03, 0xDA, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00,
-      0x63, 0x4B, 0x4C, 0xFE, 0x45, 0x00, 0x7A, 0xA0, 0xD8, 0xD1, 0xC0, 0x00, 0xC0, 0x63, 0xF7,
-      0x9D, 0x0A, 0x00, 0x88, 0x22, 0x0F, 0x1B, 0x25, 0x00, 0x1A, 0x80, 0x56, 0x34, 0xC7, 0x00,
-      0x5F, 0x48, 0x61, 0x09, 0x7C, 0x00, 0x61, 0xEF, 0x92, 0x42, 0x2F, 0x00, 0xD5, 0xD7, 0x31,
-      0x99, 0xA6, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00, 0x71, 0xDD, 0xE2, 0x22, 0x7B, 0x00,
-      0x54, 0xBF, 0xAE, 0xE9, 0x3C, 0x15, 0x00, 0x15, 0xD4, 0x02, 0x15, 0xDC, 0x02, 0x2C, 0x15,
-      0x28, 0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x04, 0x28, 0x09, 0x00, 0x7D, 0xFE,
-      0x02, 0xDA, 0xB2, 0x62, 0xA3, 0xFB, 0x18, 0x09, 0x00, 0x03, 0x9C, 0xCD, 0x5A, 0xAC, 0xBB,
-      0xF1, 0xE3, 0x00, 0x00, 0x00, 0xAA, 0x01, 0xF0, 0xA9, 0x04, 0x00, 0x00, 0x00, 0x07, 0xBF,
-      0xBF, 0x0F, 0x00, 0x7D, 0xFE, 0x02, 0xDA, 0xB2, 0x62, 0xA3, 0xFB, 0x00, 0x7D, 0x9A, 0xCB,
-      0xDA, 0x4B, 0x10, 0x8B, 0xAC, 0x00, 0x20, 0xBA, 0x97, 0x87, 0x2E, 0x3B, 0x4E, 0x04, 0x00,
-      0x15, 0xBB, 0xC2, 0xDF, 0x2D, 0x25, 0x08, 0xB6, 0x00, 0x5C, 0x67, 0x0E, 0x36, 0x30, 0xF1,
-      0xAC, 0xA4, 0x00, 0x44, 0xF1, 0x8E, 0xFB, 0x17, 0x5E, 0xE1, 0x96, 0x00, 0x64, 0x69, 0xF9,
-      0x66, 0x3F, 0x11, 0xED, 0xB9, 0x00, 0x45, 0xB5, 0xDA, 0x14, 0x9C, 0xA3, 0xFA, 0x64, 0x00,
-      0x26, 0x5F, 0xDE, 0xD7, 0x67, 0x95, 0xEF, 0xB1, 0x00, 0x35, 0xDB, 0x9B, 0x88, 0x46, 0xD0,
-      0xA1, 0x0E, 0x00, 0x45, 0xA9, 0x92, 0x8E, 0x89, 0xD1, 0xAC, 0x4C, 0x00, 0x4C, 0xF1, 0xCB,
-      0x27, 0x82, 0x3A, 0x7D, 0xB7, 0x00, 0x64, 0xD3, 0xD2, 0x2F, 0x9C, 0x83, 0x16, 0x75, 0x00,
-      0x15, 0xDF, 0xC2, 0xA9, 0x63, 0xB8, 0x33, 0x65, 0x00, 0x27, 0x40, 0x28, 0x97, 0x05, 0x8E,
-      0xE3, 0x46, 0x00, 0x03, 0x9C, 0xCD, 0x5A, 0xAC, 0xBB, 0xF1, 0xE3, 0x00, 0x22, 0x23, 0xF5,
-      0xE8, 0x9D, 0x55, 0xD4, 0x9C, 0x00, 0x25, 0xB9, 0xD8, 0x87, 0x2D, 0xF1, 0xF2, 0x17, 0x15,
-      0x02, 0x19, 0x4C, 0x48, 0x0C, 0x73, 0x70, 0x61, 0x72, 0x6B, 0x5F, 0x73, 0x63, 0x68, 0x65,
-      0x6D, 0x61, 0x15, 0x06, 0x00, 0x15, 0x0E, 0x15, 0x08, 0x15, 0x02, 0x18, 0x06, 0x64, 0x65,
-      0x63, 0x37, 0x70, 0x33, 0x25, 0x0A, 0x15, 0x06, 0x15, 0x0E, 0x00, 0x15, 0x0E, 0x15, 0x0C,
-      0x15, 0x02, 0x18, 0x08, 0x64, 0x65, 0x63, 0x31, 0x32, 0x70, 0x31, 0x31, 0x25, 0x0A, 0x15,
-      0x16, 0x15, 0x18, 0x00, 0x15, 0x0E, 0x15, 0x12, 0x15, 0x02, 0x18, 0x07, 0x64, 0x65, 0x63,
-      0x32, 0x30, 0x70, 0x31, 0x25, 0x0A, 0x15, 0x02, 0x15, 0x28, 0x00, 0x16, 0x28, 0x19, 0x1C,
-      0x19, 0x3C, 0x26, 0x08, 0x1C, 0x15, 0x0E, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x06,
-      0x64, 0x65, 0x63, 0x37, 0x70, 0x33, 0x15, 0x02, 0x16, 0x28, 0x16, 0xEE, 0x01, 0x16, 0xF4,
-      0x01, 0x26, 0x08, 0x3C, 0x36, 0x02, 0x28, 0x04, 0x00, 0x97, 0x45, 0x72, 0x18, 0x04, 0x00,
-      0x01, 0x81, 0x3B, 0x00, 0x19, 0x1C, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00, 0x00, 0x00,
-      0x26, 0xFC, 0x01, 0x1C, 0x15, 0x0E, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x08, 0x64,
-      0x65, 0x63, 0x31, 0x32, 0x70, 0x31, 0x31, 0x15, 0x02, 0x16, 0x28, 0x16, 0xC2, 0x02, 0x16,
-      0xC8, 0x02, 0x26, 0xFC, 0x01, 0x3C, 0x36, 0x02, 0x28, 0x06, 0x00, 0xD5, 0xD7, 0x31, 0x99,
-      0xA6, 0x18, 0x06, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00, 0x19, 0x1C, 0x15, 0x00, 0x15,
-      0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x26, 0xC4, 0x04, 0x1C, 0x15, 0x0E, 0x19, 0x35, 0x06,
-      0x08, 0x00, 0x19, 0x18, 0x07, 0x64, 0x65, 0x63, 0x32, 0x30, 0x70, 0x31, 0x15, 0x02, 0x16,
-      0x28, 0x16, 0xAE, 0x03, 0x16, 0xB6, 0x03, 0x26, 0xC4, 0x04, 0x3C, 0x36, 0x04, 0x28, 0x09,
-      0x00, 0x7D, 0xFE, 0x02, 0xDA, 0xB2, 0x62, 0xA3, 0xFB, 0x18, 0x09, 0x00, 0x03, 0x9C, 0xCD,
-      0x5A, 0xAC, 0xBB, 0xF1, 0xE3, 0x00, 0x19, 0x1C, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00,
-      0x00, 0x00, 0x16, 0xDE, 0x07, 0x16, 0x28, 0x00, 0x19, 0x2C, 0x18, 0x18, 0x6F, 0x72, 0x67,
-      0x2E, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2E, 0x73, 0x70, 0x61, 0x72, 0x6B, 0x2E, 0x76,
-      0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x18, 0x05, 0x33, 0x2E, 0x30, 0x2E, 0x31, 0x00, 0x18,
-      0x29, 0x6F, 0x72, 0x67, 0x2E, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2E, 0x73, 0x70, 0x61,
-      0x72, 0x6B, 0x2E, 0x73, 0x71, 0x6C, 0x2E, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2E,
-      0x72, 0x6F, 0x77, 0x2E, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0xF4, 0x01,
-      0x7B, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x22, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74,
-      0x22, 0x2C, 0x22, 0x66, 0x69, 0x65, 0x6C, 0x64, 0x73, 0x22, 0x3A, 0x5B, 0x7B, 0x22, 0x6E,
-      0x61, 0x6D, 0x65, 0x22, 0x3A, 0x22, 0x64, 0x65, 0x63, 0x37, 0x70, 0x33, 0x22, 0x2C, 0x22,
-      0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6D, 0x61, 0x6C, 0x28,
-      0x37, 0x2C, 0x33, 0x29, 0x22, 0x2C, 0x22, 0x6E, 0x75, 0x6C, 0x6C, 0x61, 0x62, 0x6C, 0x65,
-      0x22, 0x3A, 0x74, 0x72, 0x75, 0x65, 0x2C, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
-      0x61, 0x22, 0x3A, 0x7B, 0x7D, 0x7D, 0x2C, 0x7B, 0x22, 0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A,
-      0x22, 0x64, 0x65, 0x63, 0x31, 0x32, 0x70, 0x31, 0x31, 0x22, 0x2C, 0x22, 0x74, 0x79, 0x70,
-      0x65, 0x22, 0x3A, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6D, 0x61, 0x6C, 0x28, 0x31, 0x32, 0x2C,
-      0x31, 0x31, 0x29, 0x22, 0x2C, 0x22, 0x6E, 0x75, 0x6C, 0x6C, 0x61, 0x62, 0x6C, 0x65, 0x22,
-      0x3A, 0x74, 0x72, 0x75, 0x65, 0x2C, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61,
-      0x22, 0x3A, 0x7B, 0x7D, 0x7D, 0x2C, 0x7B, 0x22, 0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A, 0x22,
-      0x64, 0x65, 0x63, 0x32, 0x30, 0x70, 0x31, 0x22, 0x2C, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22,
-      0x3A, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6D, 0x61, 0x6C, 0x28, 0x32, 0x30, 0x2C, 0x31, 0x29,
-      0x22, 0x2C, 0x22, 0x6E, 0x75, 0x6C, 0x6C, 0x61, 0x62, 0x6C, 0x65, 0x22, 0x3A, 0x74, 0x72,
-      0x75, 0x65, 0x2C, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3A, 0x7B,
-      0x7D, 0x7D, 0x5D, 0x7D, 0x00, 0x18, 0x4A, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2D,
-      0x6D, 0x72, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x20, 0x31, 0x2E, 0x31, 0x30,
-      0x2E, 0x31, 0x20, 0x28, 0x62, 0x75, 0x69, 0x6C, 0x64, 0x20, 0x61, 0x38, 0x39, 0x64, 0x66,
-      0x38, 0x66, 0x39, 0x39, 0x33, 0x32, 0x62, 0x36, 0x65, 0x66, 0x36, 0x36, 0x33, 0x33, 0x64,
-      0x30, 0x36, 0x30, 0x36, 0x39, 0x65, 0x35, 0x30, 0x63, 0x39, 0x62, 0x37, 0x39, 0x37, 0x30,
-      0x62, 0x65, 0x62, 0x64, 0x31, 0x29, 0x19, 0x3C, 0x1C, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x1C,
-      0x00, 0x00, 0x00, 0xC5, 0x02, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31,
-    };
-
-    unsigned int parquet_len = 1226;
-
-    cudf::io::parquet_reader_options read_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{
-        reinterpret_cast<const char*>(fixed_len_bytes_decimal_parquet), parquet_len});
-    auto result = cudf::io::read_parquet(read_opts);
-    EXPECT_EQ(result.tbl->view().num_columns(), 3);
-
-    auto validity_c0    = cudf::test::iterators::nulls_at({19});
-    int32_t col0_data[] = {6361295, 698632,  7821423, 7073444, 9631892, 3021012, 5195059,
-                           9913714, 901749,  7776938, 3186566, 4955569, 5131067, 98619,
-                           2282579, 7521455, 4430706, 1937859, 4532040, 0};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(0).size()),
-              sizeof(col0_data) / sizeof(col0_data[0]));
-    cudf::test::fixed_point_column_wrapper<int32_t> col0(
-      std::begin(col0_data), std::end(col0_data), validity_c0, numeric::scale_type{-3});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), col0);
-
-    auto validity_c1    = cudf::test::iterators::nulls_at({18});
-    int64_t col1_data[] = {361378026250,
-                           30646804862,
-                           429930238629,
-                           418758703536,
-                           895494171113,
-                           435283865083,
-                           809096053722,
-                           -999999999999,
-                           426465099333,
-                           526684574144,
-                           826310892810,
-                           584686967589,
-                           113822282951,
-                           409236212092,
-                           420631167535,
-                           918438386086,
-                           -999999999999,
-                           489053889147,
-                           0,
-                           363993164092};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(1).size()),
-              sizeof(col1_data) / sizeof(col1_data[0]));
-    cudf::test::fixed_point_column_wrapper<int64_t> col1(
-      std::begin(col1_data), std::end(col1_data), validity_c1, numeric::scale_type{-11});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1);
-
-    auto validity_c2       = cudf::test::iterators::nulls_at({6, 14});
-    __int128_t col2_data[] = {9078697037144433659,
-                              9050770539577117612,
-                              2358363961733893636,
-                              1566059559232276662,
-                              6658306200002735268,
-                              4967909073046397334,
-                              0,
-                              7235588493887532473,
-                              5023160741463849572,
-                              2765173712965988273,
-                              3880866513515749646,
-                              5019704400576359500,
-                              5544435986818825655,
-                              7265381725809874549,
-                              0,
-                              1576192427381240677,
-                              2828305195087094598,
-                              260308667809395171,
-                              2460080200895288476,
-                              2718441925197820439};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(2).size()),
-              sizeof(col2_data) / sizeof(col2_data[0]));
-    cudf::test::fixed_point_column_wrapper<__int128_t> col2(
-      std::begin(col2_data), std::end(col2_data), validity_c2, numeric::scale_type{-1});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), col2);
-  }
-}
-
-TEST_F(ParquetReaderTest, EmptyOutput)
-{
-  cudf::test::fixed_width_column_wrapper<int> c0;
-  cudf::test::strings_column_wrapper c1;
-  cudf::test::fixed_point_column_wrapper<int> c2({}, numeric::scale_type{2});
-  cudf::test::lists_column_wrapper<float> _c3{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}};
-  auto c3 = cudf::empty_like(_c3);
-
-  cudf::test::fixed_width_column_wrapper<int> sc0;
-  cudf::test::strings_column_wrapper sc1;
-  cudf::test::lists_column_wrapper<int> _sc2{{1, 2}};
-  std::vector<std::unique_ptr<cudf::column>> struct_children;
-  struct_children.push_back(sc0.release());
-  struct_children.push_back(sc1.release());
-  struct_children.push_back(cudf::empty_like(_sc2));
-  cudf::test::structs_column_wrapper c4(std::move(struct_children));
-
-  table_view expected({c0, c1, c2, *c3, c4});
-
-  // set precision on the decimal column
-  cudf::io::table_input_metadata expected_metadata(expected);
-  expected_metadata.column_metadata[2].set_decimal_precision(1);
-
-  auto filepath = temp_env->get_temp_filepath("EmptyOutput.parquet");
-  cudf::io::parquet_writer_options out_args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  out_args.set_metadata(&expected_metadata);
-  cudf::io::write_parquet(out_args);
-
-  cudf::io::parquet_reader_options read_args =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto result = cudf::io::read_parquet(read_args);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TEST_F(ParquetWriterTest, RowGroupSizeInvalid)
-{
-  const auto unused_table = std::make_unique<table>();
-  std::vector<char> out_buffer;
-
-  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
-                                                         unused_table->view())
-                 .row_group_size_rows(4999),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
-                                                         unused_table->view())
-                 .max_page_size_rows(4999),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
-                                                         unused_table->view())
-                 .row_group_size_bytes(3 << 10),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
-                                                         unused_table->view())
-                 .max_page_size_bytes(3 << 10),
-               cudf::logic_error);
-
-  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
-                 .row_group_size_rows(4999),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
-                 .max_page_size_rows(4999),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
-                 .row_group_size_bytes(3 << 10),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
-                 .max_page_size_bytes(3 << 10),
-               cudf::logic_error);
-}
-
-TEST_F(ParquetWriterTest, RowGroupPageSizeMatch)
-{
-  const auto unused_table = std::make_unique<table>();
-  std::vector<char> out_buffer;
-
-  auto options = cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
-                                                           unused_table->view())
-                   .row_group_size_bytes(128 * 1024)
-                   .max_page_size_bytes(512 * 1024)
-                   .row_group_size_rows(10000)
-                   .max_page_size_rows(20000)
-                   .build();
-  EXPECT_EQ(options.get_row_group_size_bytes(), options.get_max_page_size_bytes());
-  EXPECT_EQ(options.get_row_group_size_rows(), options.get_max_page_size_rows());
-}
-
-TEST_F(ParquetChunkedWriterTest, RowGroupPageSizeMatch)
-{
-  std::vector<char> out_buffer;
-
-  auto options = cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
-                   .row_group_size_bytes(128 * 1024)
-                   .max_page_size_bytes(512 * 1024)
-                   .row_group_size_rows(10000)
-                   .max_page_size_rows(20000)
-                   .build();
-  EXPECT_EQ(options.get_row_group_size_bytes(), options.get_max_page_size_bytes());
-  EXPECT_EQ(options.get_row_group_size_rows(), options.get_max_page_size_rows());
-}
-
-TEST_F(ParquetWriterTest, EmptyList)
-{
-  auto L1 = cudf::make_lists_column(0,
-                                    cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)),
-                                    cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}),
-                                    0,
-                                    {});
-  auto L0 = cudf::make_lists_column(
-    3, cudf::test::fixed_width_column_wrapper<int32_t>{0, 0, 0, 0}.release(), std::move(L1), 0, {});
-
-  auto filepath = temp_env->get_temp_filepath("EmptyList.parquet");
-  cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
-                                                                   cudf::table_view({*L0})));
-
-  auto result = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
-
-  using lcw     = cudf::test::lists_column_wrapper<int64_t>;
-  auto expected = lcw{lcw{}, lcw{}, lcw{}};
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), expected);
-}
-
-TEST_F(ParquetWriterTest, DeepEmptyList)
-{
-  // Make a list column LLLi st only L is valid and LLi are all null. This tests whether we can
-  // handle multiple nullptr offsets
-
-  auto L2 = cudf::make_lists_column(0,
-                                    cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)),
-                                    cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}),
-                                    0,
-                                    {});
-  auto L1 = cudf::make_lists_column(
-    0, cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)), std::move(L2), 0, {});
-  auto L0 = cudf::make_lists_column(
-    3, cudf::test::fixed_width_column_wrapper<int32_t>{0, 0, 0, 0}.release(), std::move(L1), 0, {});
-
-  auto filepath = temp_env->get_temp_filepath("DeepEmptyList.parquet");
-  cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
-                                                                   cudf::table_view({*L0})));
-
-  auto result = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0);
-}
-
-TEST_F(ParquetWriterTest, EmptyListWithStruct)
-{
-  auto L2 = cudf::make_lists_column(0,
-                                    cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)),
-                                    cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}),
-                                    0,
-                                    {});
-
-  auto children = std::vector<std::unique_ptr<cudf::column>>{};
-  children.push_back(std::move(L2));
-  auto S2 = cudf::make_structs_column(0, std::move(children), 0, {});
-  auto L1 = cudf::make_lists_column(
-    0, cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)), std::move(S2), 0, {});
-  auto L0 = cudf::make_lists_column(
-    3, cudf::test::fixed_width_column_wrapper<int32_t>{0, 0, 0, 0}.release(), std::move(L1), 0, {});
-
-  auto filepath = temp_env->get_temp_filepath("EmptyListWithStruct.parquet");
-  cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
-                                                                   cudf::table_view({*L0})));
-  auto result = cudf::io::read_parquet(
-    cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0);
-}
-
-TEST_F(ParquetWriterTest, CheckPageRows)
-{
-  auto sequence = thrust::make_counting_iterator(0);
-  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
-
-  constexpr auto page_rows = 5000;
-  constexpr auto num_rows  = 2 * page_rows;
-  column_wrapper<int> col(sequence, sequence + num_rows, validity);
-
-  auto expected = table_view{{col}};
-
-  auto const filepath = temp_env->get_temp_filepath("CheckPageRows.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .max_page_size_rows(page_rows);
-  cudf::io::write_parquet(out_opts);
-
-  // check first page header and make sure it has only page_rows values
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  CUDF_EXPECTS(fmd.row_groups.size() > 0, "No row groups found");
-  CUDF_EXPECTS(fmd.row_groups[0].columns.size() == 1, "Invalid number of columns");
-  auto const& first_chunk = fmd.row_groups[0].columns[0].meta_data;
-  CUDF_EXPECTS(first_chunk.data_page_offset > 0, "Invalid location for first data page");
-
-  // read first data page header.  sizeof(PageHeader) is not exact, but the thrift encoded
-  // version should be smaller than size of the struct.
-  auto const ph = read_page_header(
-    source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::PageHeader), 0});
-
-  EXPECT_EQ(ph.data_page_header.num_values, page_rows);
-}
-
-TEST_F(ParquetWriterTest, Decimal128Stats)
-{
-  // check that decimal128 min and max statistics are written in network byte order
-  // this is negative, so should be the min
-  std::vector<uint8_t> expected_min{
-    0xa1, 0xb2, 0xc3, 0xd4, 0xe5, 0xf6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-  std::vector<uint8_t> expected_max{
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xa1, 0xb2, 0xc3, 0xd4, 0xe5, 0xf6};
-
-  __int128_t val0 = 0xa1b2'c3d4'e5f6ULL;
-  __int128_t val1 = val0 << 80;
-  column_wrapper<numeric::decimal128> col0{{numeric::decimal128(val0, numeric::scale_type{0}),
-                                            numeric::decimal128(val1, numeric::scale_type{0})}};
-
-  auto expected = table_view{{col0}};
-
-  auto const filepath = temp_env->get_temp_filepath("Decimal128Stats.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  auto const stats = parse_statistics(fmd.row_groups[0].columns[0]);
-
-  EXPECT_EQ(expected_min, stats.min_value);
-  EXPECT_EQ(expected_max, stats.max_value);
-}
-
-// =============================================================================
-// ---- test data for stats sort order tests
-// need at least 3 pages, and min page count is 5000, so need at least 15000 values.
-// use 20000 to be safe.
-static constexpr int num_ordered_rows            = 20000;
-static constexpr int page_size_for_ordered_tests = 5000;
-
-namespace {
-namespace testdata {
-// ----- most numerics. scale by 100 so all values fit in a single byte
-
-template <typename T>
-std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
-                 cudf::test::fixed_width_column_wrapper<T>>
-ascending()
-{
-  int start = std::is_signed_v<T> ? -num_ordered_rows / 2 : 0;
-  auto elements =
-    cudf::detail::make_counting_transform_iterator(start, [](auto i) { return i / 100; });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
-                 cudf::test::fixed_width_column_wrapper<T>>
-descending()
-{
-  if (std::is_signed_v<T>) {
-    auto elements = cudf::detail::make_counting_transform_iterator(-num_ordered_rows / 2,
-                                                                   [](auto i) { return -i / 100; });
-    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-  } else {
-    auto elements = cudf::detail::make_counting_transform_iterator(
-      0, [](auto i) { return (num_ordered_rows - i) / 100; });
-    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-  }
-}
-
-template <typename T>
-std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
-                 cudf::test::fixed_width_column_wrapper<T>>
-unordered()
-{
-  if (std::is_signed_v<T>) {
-    auto elements = cudf::detail::make_counting_transform_iterator(
-      -num_ordered_rows / 2, [](auto i) { return (i % 2 ? i : -i) / 100; });
-    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-  } else {
-    auto elements = cudf::detail::make_counting_transform_iterator(
-      0, [](auto i) { return (i % 2 ? i : num_ordered_rows - i) / 100; });
-    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-  }
-}
-
-// ----- bool
-
-template <typename T>
-std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> ascending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return i < num_ordered_rows / 2 ? false : true; });
-  return cudf::test::fixed_width_column_wrapper<bool>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> descending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return i < num_ordered_rows / 2 ? true : false; });
-  return cudf::test::fixed_width_column_wrapper<bool>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> unordered()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    switch (i / page_size_for_ordered_tests) {
-      case 0: return true;
-      case 1: return false;
-      case 2: return true;
-      default: return false;
-    }
-  });
-  return cudf::test::fixed_width_column_wrapper<bool>(elements, elements + num_ordered_rows);
-}
-
-// ----- fixed point types
-
-template <typename T>
-std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    -num_ordered_rows / 2, [](auto i) { return T(i, numeric::scale_type{0}); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> descending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    -num_ordered_rows / 2, [](auto i) { return T(-i, numeric::scale_type{0}); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    -num_ordered_rows / 2, [](auto i) { return T(i % 2 ? i : -i, numeric::scale_type{0}); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-// ----- chrono types
-// ----- timstamp
-
-template <typename T>
-std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return T(typename T::duration(i)); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> descending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return T(typename T::duration(num_ordered_rows - i)); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return T(typename T::duration(i % 2 ? i : num_ordered_rows - i)); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-// ----- duration
-
-template <typename T>
-std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return T(i); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> descending()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return T(num_ordered_rows - i); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered()
-{
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return T(i % 2 ? i : num_ordered_rows - i); });
-  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
-}
-
-// ----- string_view
-
-template <typename T>
-std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
-ascending()
-{
-  char buf[10];
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
-    sprintf(buf, "%09d", i);
-    return std::string(buf);
-  });
-  return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
-descending()
-{
-  char buf[10];
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
-    sprintf(buf, "%09d", num_ordered_rows - i);
-    return std::string(buf);
-  });
-  return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
-}
-
-template <typename T>
-std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
-unordered()
-{
-  char buf[10];
-  auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
-    sprintf(buf, "%09d", (i % 2 == 0) ? i : (num_ordered_rows - i));
-    return std::string(buf);
-  });
-  return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
-}
-
-}  // namespace testdata
-}  // anonymous namespace
-
-TYPED_TEST(ParquetWriterComparableTypeTest, ThreeColumnSorted)
-{
-  using T = TypeParam;
-
-  auto col0 = testdata::ascending<T>();
-  auto col1 = testdata::descending<T>();
-  auto col2 = testdata::unordered<T>();
-
-  auto const expected = table_view{{col0, col1, col2}};
-
-  auto const filepath = temp_env->get_temp_filepath("ThreeColumnSorted.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .max_page_size_rows(page_size_for_ordered_tests)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  CUDF_EXPECTS(fmd.row_groups.size() > 0, "No row groups found");
-
-  auto const& columns = fmd.row_groups[0].columns;
-  CUDF_EXPECTS(columns.size() == static_cast<size_t>(expected.num_columns()),
-               "Invalid number of columns");
-
-  // now check that the boundary order for chunk 1 is ascending,
-  // chunk 2 is descending, and chunk 3 is unordered
-  cudf::io::parquet::BoundaryOrder expected_orders[] = {
-    cudf::io::parquet::BoundaryOrder::ASCENDING,
-    cudf::io::parquet::BoundaryOrder::DESCENDING,
-    cudf::io::parquet::BoundaryOrder::UNORDERED};
-
-  for (std::size_t i = 0; i < columns.size(); i++) {
-    auto const ci = read_column_index(source, columns[i]);
-    EXPECT_EQ(ci.boundary_order, expected_orders[i]);
-  }
-}
-
-// utility functions for column index tests
-
-// compare two values.  return -1 if v1 < v2,
-// 0 if v1 == v2, and 1 if v1 > v2.
-template <typename T>
-int32_t compare(T& v1, T& v2)
-{
-  return (v1 > v2) - (v1 < v2);
-}
-
-// compare two binary statistics blobs based on their physical
-// and converted types. returns -1 if v1 < v2, 0 if v1 == v2, and
-// 1 if v1 > v2.
-int32_t compare_binary(const std::vector<uint8_t>& v1,
-                       const std::vector<uint8_t>& v2,
-                       cudf::io::parquet::Type ptype,
-                       cudf::io::parquet::ConvertedType ctype)
-{
-  switch (ptype) {
-    case cudf::io::parquet::INT32:
-      switch (ctype) {
-        case cudf::io::parquet::UINT_8:
-        case cudf::io::parquet::UINT_16:
-        case cudf::io::parquet::UINT_32:
-          return compare(*(reinterpret_cast<const uint32_t*>(v1.data())),
-                         *(reinterpret_cast<const uint32_t*>(v2.data())));
-        default:
-          return compare(*(reinterpret_cast<const int32_t*>(v1.data())),
-                         *(reinterpret_cast<const int32_t*>(v2.data())));
-      }
-
-    case cudf::io::parquet::INT64:
-      if (ctype == cudf::io::parquet::UINT_64) {
-        return compare(*(reinterpret_cast<const uint64_t*>(v1.data())),
-                       *(reinterpret_cast<const uint64_t*>(v2.data())));
-      }
-      return compare(*(reinterpret_cast<const int64_t*>(v1.data())),
-                     *(reinterpret_cast<const int64_t*>(v2.data())));
-
-    case cudf::io::parquet::FLOAT:
-      return compare(*(reinterpret_cast<const float*>(v1.data())),
-                     *(reinterpret_cast<const float*>(v2.data())));
-
-    case cudf::io::parquet::DOUBLE:
-      return compare(*(reinterpret_cast<const double*>(v1.data())),
-                     *(reinterpret_cast<const double*>(v2.data())));
-
-    case cudf::io::parquet::BYTE_ARRAY: {
-      int32_t v1sz = v1.size();
-      int32_t v2sz = v2.size();
-      int32_t ret  = memcmp(v1.data(), v2.data(), std::min(v1sz, v2sz));
-      if (ret != 0 or v1sz == v2sz) { return ret; }
-      return v1sz - v2sz;
-    }
-
-    default: CUDF_FAIL("Invalid type in compare_binary");
-  }
-
-  return 0;
-}
-
-TEST_F(ParquetWriterTest, CheckColumnOffsetIndex)
-{
-  constexpr auto num_rows = 100000;
-
-  // fixed length strings
-  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%012d", i);
-    return std::string(buf);
-  });
-  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
-
-  auto col1_data = random_values<int8_t>(num_rows);
-  auto col2_data = random_values<int16_t>(num_rows);
-  auto col3_data = random_values<int32_t>(num_rows);
-  auto col4_data = random_values<uint64_t>(num_rows);
-  auto col5_data = random_values<float>(num_rows);
-  auto col6_data = random_values<double>(num_rows);
-
-  auto col1 = cudf::test::fixed_width_column_wrapper<int8_t>(col1_data.begin(), col1_data.end());
-  auto col2 = cudf::test::fixed_width_column_wrapper<int16_t>(col2_data.begin(), col2_data.end());
-  auto col3 = cudf::test::fixed_width_column_wrapper<int32_t>(col3_data.begin(), col3_data.end());
-  auto col4 = cudf::test::fixed_width_column_wrapper<uint64_t>(col4_data.begin(), col4_data.end());
-  auto col5 = cudf::test::fixed_width_column_wrapper<float>(col5_data.begin(), col5_data.end());
-  auto col6 = cudf::test::fixed_width_column_wrapper<double>(col6_data.begin(), col6_data.end());
-
-  // mixed length strings
-  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%d", i);
-    return std::string(buf);
-  });
-  auto col7          = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
-
-  auto const expected = table_view{{col0, col1, col2, col3, col4, col5, col6, col7}};
-
-  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndex.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .max_page_size_rows(20000);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      // loop over offsets, read each page header, make sure it's a data page and that
-      // the first row index is correct
-      auto const oi = read_offset_index(source, chunk);
-
-      int64_t num_vals = 0;
-      for (size_t o = 0; o < oi.page_locations.size(); o++) {
-        auto const& page_loc = oi.page_locations[o];
-        auto const ph        = read_page_header(source, page_loc);
-        EXPECT_EQ(ph.type, cudf::io::parquet::PageType::DATA_PAGE);
-        EXPECT_EQ(page_loc.first_row_index, num_vals);
-        num_vals += ph.data_page_header.num_values;
-      }
-
-      // loop over page stats from the column index. check that stats.min <= page.min
-      // and stats.max >= page.max for each page.
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = parse_statistics(chunk);
-
-      // schema indexing starts at 1
-      auto const ptype = fmd.schema[c + 1].type;
-      auto const ctype = fmd.schema[c + 1].converted_type;
-      for (size_t p = 0; p < ci.min_values.size(); p++) {
-        // null_pages should always be false
-        EXPECT_FALSE(ci.null_pages[p]);
-        // null_counts should always be 0
-        EXPECT_EQ(ci.null_counts[p], 0);
-        EXPECT_TRUE(compare_binary(stats.min_value, ci.min_values[p], ptype, ctype) <= 0);
-      }
-      for (size_t p = 0; p < ci.max_values.size(); p++)
-        EXPECT_TRUE(compare_binary(stats.max_value, ci.max_values[p], ptype, ctype) >= 0);
-    }
-  }
-}
-
-TEST_F(ParquetWriterTest, CheckColumnOffsetIndexNulls)
-{
-  constexpr auto num_rows = 100000;
-
-  // fixed length strings
-  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%012d", i);
-    return std::string(buf);
-  });
-  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
-
-  auto col1_data = random_values<int8_t>(num_rows);
-  auto col2_data = random_values<int16_t>(num_rows);
-  auto col3_data = random_values<int32_t>(num_rows);
-  auto col4_data = random_values<uint64_t>(num_rows);
-  auto col5_data = random_values<float>(num_rows);
-  auto col6_data = random_values<double>(num_rows);
-
-  auto valids =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
-
-  // add null values for all but first column
-  auto col1 =
-    cudf::test::fixed_width_column_wrapper<int8_t>(col1_data.begin(), col1_data.end(), valids);
-  auto col2 =
-    cudf::test::fixed_width_column_wrapper<int16_t>(col2_data.begin(), col2_data.end(), valids);
-  auto col3 =
-    cudf::test::fixed_width_column_wrapper<int32_t>(col3_data.begin(), col3_data.end(), valids);
-  auto col4 =
-    cudf::test::fixed_width_column_wrapper<uint64_t>(col4_data.begin(), col4_data.end(), valids);
-  auto col5 =
-    cudf::test::fixed_width_column_wrapper<float>(col5_data.begin(), col5_data.end(), valids);
-  auto col6 =
-    cudf::test::fixed_width_column_wrapper<double>(col6_data.begin(), col6_data.end(), valids);
-
-  // mixed length strings
-  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%d", i);
-    return std::string(buf);
-  });
-  auto col7 = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows, valids);
-
-  auto expected = table_view{{col0, col1, col2, col3, col4, col5, col6, col7}};
-
-  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexNulls.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .max_page_size_rows(20000);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      // loop over offsets, read each page header, make sure it's a data page and that
-      // the first row index is correct
-      auto const oi = read_offset_index(source, chunk);
-
-      int64_t num_vals = 0;
-      for (size_t o = 0; o < oi.page_locations.size(); o++) {
-        auto const& page_loc = oi.page_locations[o];
-        auto const ph        = read_page_header(source, page_loc);
-        EXPECT_EQ(ph.type, cudf::io::parquet::PageType::DATA_PAGE);
-        EXPECT_EQ(page_loc.first_row_index, num_vals);
-        num_vals += ph.data_page_header.num_values;
-      }
-
-      // loop over page stats from the column index. check that stats.min <= page.min
-      // and stats.max >= page.max for each page.
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = parse_statistics(chunk);
-
-      // schema indexing starts at 1
-      auto const ptype = fmd.schema[c + 1].type;
-      auto const ctype = fmd.schema[c + 1].converted_type;
-      for (size_t p = 0; p < ci.min_values.size(); p++) {
-        EXPECT_FALSE(ci.null_pages[p]);
-        if (c > 0) {  // first column has no nulls
-          EXPECT_GT(ci.null_counts[p], 0);
-        } else {
-          EXPECT_EQ(ci.null_counts[p], 0);
-        }
-        EXPECT_TRUE(compare_binary(stats.min_value, ci.min_values[p], ptype, ctype) <= 0);
-      }
-      for (size_t p = 0; p < ci.max_values.size(); p++) {
-        EXPECT_TRUE(compare_binary(stats.max_value, ci.max_values[p], ptype, ctype) >= 0);
-      }
-    }
-  }
-}
-
-TEST_F(ParquetWriterTest, CheckColumnOffsetIndexNullColumn)
-{
-  constexpr auto num_rows = 100000;
-
-  // fixed length strings
-  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%012d", i);
-    return std::string(buf);
-  });
-  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
-
-  auto col1_data = random_values<int32_t>(num_rows);
-  auto col2_data = random_values<int32_t>(num_rows);
-
-  // col1 is all nulls
-  auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return false; });
-  auto col1 =
-    cudf::test::fixed_width_column_wrapper<int32_t>(col1_data.begin(), col1_data.end(), valids);
-  auto col2 = cudf::test::fixed_width_column_wrapper<int32_t>(col2_data.begin(), col2_data.end());
-
-  // mixed length strings
-  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%d", i);
-    return std::string(buf);
-  });
-  auto col3          = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
-
-  auto expected = table_view{{col0, col1, col2, col3}};
-
-  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexNullColumn.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .max_page_size_rows(20000);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      // loop over offsets, read each page header, make sure it's a data page and that
-      // the first row index is correct
-      auto const oi = read_offset_index(source, chunk);
-
-      int64_t num_vals = 0;
-      for (size_t o = 0; o < oi.page_locations.size(); o++) {
-        auto const& page_loc = oi.page_locations[o];
-        auto const ph        = read_page_header(source, page_loc);
-        EXPECT_EQ(ph.type, cudf::io::parquet::PageType::DATA_PAGE);
-        EXPECT_EQ(page_loc.first_row_index, num_vals);
-        num_vals += ph.data_page_header.num_values;
-      }
-
-      // loop over page stats from the column index. check that stats.min <= page.min
-      // and stats.max >= page.max for each non-empty page.
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = parse_statistics(chunk);
-
-      // schema indexing starts at 1
-      auto const ptype = fmd.schema[c + 1].type;
-      auto const ctype = fmd.schema[c + 1].converted_type;
-      for (size_t p = 0; p < ci.min_values.size(); p++) {
-        // check tnat null_pages is true for column 1
-        if (c == 1) {
-          EXPECT_TRUE(ci.null_pages[p]);
-          EXPECT_GT(ci.null_counts[p], 0);
-        }
-        if (not ci.null_pages[p]) {
-          EXPECT_EQ(ci.null_counts[p], 0);
-          EXPECT_TRUE(compare_binary(stats.min_value, ci.min_values[p], ptype, ctype) <= 0);
-        }
-      }
-      for (size_t p = 0; p < ci.max_values.size(); p++) {
-        if (not ci.null_pages[p]) {
-          EXPECT_TRUE(compare_binary(stats.max_value, ci.max_values[p], ptype, ctype) >= 0);
-        }
-      }
-    }
-  }
-}
-
-TEST_F(ParquetWriterTest, CheckColumnOffsetIndexStruct)
-{
-  auto c0 = testdata::ascending<uint32_t>();
-
-  auto sc0 = testdata::ascending<cudf::string_view>();
-  auto sc1 = testdata::descending<int32_t>();
-  auto sc2 = testdata::unordered<int64_t>();
-
-  std::vector<std::unique_ptr<cudf::column>> struct_children;
-  struct_children.push_back(sc0.release());
-  struct_children.push_back(sc1.release());
-  struct_children.push_back(sc2.release());
-  cudf::test::structs_column_wrapper c1(std::move(struct_children));
-
-  auto listgen = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return i % 2 == 0 ? i / 2 : num_ordered_rows - (i / 2); });
-  auto list =
-    cudf::test::fixed_width_column_wrapper<int32_t>(listgen, listgen + 2 * num_ordered_rows);
-  auto offgen = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
-  auto offsets =
-    cudf::test::fixed_width_column_wrapper<int32_t>(offgen, offgen + num_ordered_rows + 1);
-
-  auto c2 = cudf::make_lists_column(num_ordered_rows, offsets.release(), list.release(), 0, {});
-
-  table_view expected({c0, c1, *c2});
-
-  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexStruct.parquet");
-  const cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .max_page_size_rows(page_size_for_ordered_tests);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  // hard coded schema indices.
-  // TODO find a way to do this without magic
-  size_t colidxs[] = {1, 3, 4, 5, 8};
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      size_t colidx     = colidxs[c];
-      auto const& chunk = rg.columns[c];
-
-      // loop over offsets, read each page header, make sure it's a data page and that
-      // the first row index is correct
-      auto const oi = read_offset_index(source, chunk);
-
-      int64_t num_vals = 0;
-      for (size_t o = 0; o < oi.page_locations.size(); o++) {
-        auto const& page_loc = oi.page_locations[o];
-        auto const ph        = read_page_header(source, page_loc);
-        EXPECT_EQ(ph.type, cudf::io::parquet::PageType::DATA_PAGE);
-        // last column has 2 values per row
-        EXPECT_EQ(page_loc.first_row_index * (c == rg.columns.size() - 1 ? 2 : 1), num_vals);
-        num_vals += ph.data_page_header.num_values;
-      }
-
-      // loop over page stats from the column index. check that stats.min <= page.min
-      // and stats.max >= page.max for each page.
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = parse_statistics(chunk);
-
-      auto const ptype = fmd.schema[colidx].type;
-      auto const ctype = fmd.schema[colidx].converted_type;
-      for (size_t p = 0; p < ci.min_values.size(); p++) {
-        EXPECT_TRUE(compare_binary(stats.min_value, ci.min_values[p], ptype, ctype) <= 0);
-      }
-      for (size_t p = 0; p < ci.max_values.size(); p++) {
-        EXPECT_TRUE(compare_binary(stats.max_value, ci.max_values[p], ptype, ctype) >= 0);
-      }
-    }
-  }
-}
-
-TEST_F(ParquetWriterTest, CheckColumnIndexTruncation)
-{
-  const char* coldata[] = {
-    // in-range 7 bit.  should truncate to "yyyyyyyz"
-    "yyyyyyyyy",
-    // max 7 bit. should truncate to "x7fx7fx7fx7fx7fx7fx7fx80", since it's
-    // considered binary, not UTF-8.  If UTF-8 it should not truncate.
-    "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f",
-    // max binary.  this should not truncate
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff",
-    // in-range 2-byte UTF8 (U+00E9). should truncate to "éééê"
-    "ééééé",
-    // max 2-byte UTF8 (U+07FF). should not truncate
-    "߿߿߿߿߿",
-    // in-range 3-byte UTF8 (U+0800). should truncate to "ࠀࠁ"
-    "ࠀࠀࠀ",
-    // max 3-byte UTF8 (U+FFFF). should not truncate
-    "\xef\xbf\xbf\xef\xbf\xbf\xef\xbf\xbf",
-    // in-range 4-byte UTF8 (U+10000). should truncate to "𐀀𐀁"
-    "𐀀𐀀𐀀",
-    // max unicode (U+10FFFF). should truncate to \xf4\x8f\xbf\xbf\xf4\x90\x80\x80,
-    // which is no longer valid unicode, but is still ok UTF-8???
-    "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf",
-    // max 4-byte UTF8 (U+1FFFFF). should not truncate
-    "\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf"};
-
-  // NOTE: UTF8 min is initialized with 0xf7bfbfbf. Binary values larger
-  // than that will not become minimum value (when written as UTF-8).
-  const char* truncated_min[] = {"yyyyyyyy",
-                                 "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f",
-                                 "\xf7\xbf\xbf\xbf",
-                                 "éééé",
-                                 "߿߿߿߿",
-                                 "ࠀࠀ",
-                                 "\xef\xbf\xbf\xef\xbf\xbf",
-                                 "𐀀𐀀",
-                                 "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf",
-                                 "\xf7\xbf\xbf\xbf"};
-
-  const char* truncated_max[] = {"yyyyyyyz",
-                                 "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x80",
-                                 "\xff\xff\xff\xff\xff\xff\xff\xff\xff",
-                                 "éééê",
-                                 "߿߿߿߿߿",
-                                 "ࠀࠁ",
-                                 "\xef\xbf\xbf\xef\xbf\xbf\xef\xbf\xbf",
-                                 "𐀀𐀁",
-                                 "\xf4\x8f\xbf\xbf\xf4\x90\x80\x80",
-                                 "\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf"};
-
-  auto cols = [&]() {
-    using string_wrapper = column_wrapper<cudf::string_view>;
-    std::vector<std::unique_ptr<column>> cols;
-    for (auto const str : coldata) {
-      cols.push_back(string_wrapper{str}.release());
-    }
-    return cols;
-  }();
-  auto expected = std::make_unique<table>(std::move(cols));
-
-  auto const filepath = temp_env->get_temp_filepath("CheckColumnIndexTruncation.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected->view())
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .column_index_truncate_length(8);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = parse_statistics(chunk);
-
-      // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max
-      auto const ptype = fmd.schema[c + 1].type;
-      auto const ctype = fmd.schema[c + 1].converted_type;
-      EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value, ptype, ctype) <= 0);
-      EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value, ptype, ctype) >= 0);
-
-      // check that truncated values == expected
-      EXPECT_EQ(memcmp(ci.min_values[0].data(), truncated_min[c], ci.min_values[0].size()), 0);
-      EXPECT_EQ(memcmp(ci.max_values[0].data(), truncated_max[c], ci.max_values[0].size()), 0);
-    }
-  }
-}
-
-TEST_F(ParquetWriterTest, BinaryColumnIndexTruncation)
-{
-  std::vector<uint8_t> truncated_min[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe},
-                                          {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
-                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
-
-  std::vector<uint8_t> truncated_max[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff},
-                                          {0xff},
-                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
-
-  cudf::test::lists_column_wrapper<uint8_t> col0{
-    {0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe}};
-  cudf::test::lists_column_wrapper<uint8_t> col1{
-    {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
-  cudf::test::lists_column_wrapper<uint8_t> col2{
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
-
-  auto expected = table_view{{col0, col1, col2}};
-
-  cudf::io::table_input_metadata output_metadata(expected);
-  output_metadata.column_metadata[0].set_name("col_binary0").set_output_as_binary(true);
-  output_metadata.column_metadata[1].set_name("col_binary1").set_output_as_binary(true);
-  output_metadata.column_metadata[2].set_name("col_binary2").set_output_as_binary(true);
-
-  auto const filepath = temp_env->get_temp_filepath("BinaryColumnIndexTruncation.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(&output_metadata)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .column_index_truncate_length(8);
-  cudf::io::write_parquet(out_opts);
-
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
-    auto const& rg = fmd.row_groups[r];
-    for (size_t c = 0; c < rg.columns.size(); c++) {
-      auto const& chunk = rg.columns[c];
-
-      auto const ci    = read_column_index(source, chunk);
-      auto const stats = parse_statistics(chunk);
-
-      // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max
-      auto const ptype = fmd.schema[c + 1].type;
-      auto const ctype = fmd.schema[c + 1].converted_type;
-      EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value, ptype, ctype) <= 0);
-      EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value, ptype, ctype) >= 0);
-
-      // check that truncated values == expected
-      EXPECT_EQ(ci.min_values[0], truncated_min[c]);
-      EXPECT_EQ(ci.max_values[0], truncated_max[c]);
-    }
-  }
-}
-
-TEST_F(ParquetReaderTest, EmptyColumnsParam)
-{
-  srand(31337);
-  auto const expected = create_random_fixed_table<int>(2, 4, false);
-
-  std::vector<char> out_buffer;
-  cudf::io::parquet_writer_options args =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&out_buffer}, *expected);
-  cudf::io::write_parquet(args);
-
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(
-      cudf::io::source_info{out_buffer.data(), out_buffer.size()})
-      .columns({});
-  auto const result = cudf::io::read_parquet(read_opts);
-
-  EXPECT_EQ(result.tbl->num_columns(), 0);
-  EXPECT_EQ(result.tbl->num_rows(), 0);
-}
-
-TEST_F(ParquetReaderTest, BinaryAsStrings)
-{
-  std::vector<const char*> strings{
-    "Monday", "Wednesday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
-  const auto num_rows = strings.size();
-
-  auto seq_col0 = random_values<int>(num_rows);
-  auto seq_col2 = random_values<float>(num_rows);
-  auto seq_col3 = random_values<int8_t>(num_rows);
-  auto validity = cudf::test::iterators::no_nulls();
-
-  column_wrapper<int> int_col{seq_col0.begin(), seq_col0.end(), validity};
-  column_wrapper<cudf::string_view> string_col{strings.begin(), strings.end()};
-  column_wrapper<float> float_col{seq_col2.begin(), seq_col2.end(), validity};
-  cudf::test::lists_column_wrapper<int8_t> list_int_col{
-    {'M', 'o', 'n', 'd', 'a', 'y'},
-    {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'M', 'o', 'n', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'F', 'r', 'i', 'd', 'a', 'y'},
-    {'F', 'u', 'n', 'd', 'a', 'y'}};
-
-  auto output = table_view{{int_col, string_col, float_col, string_col, list_int_col}};
-  cudf::io::table_input_metadata output_metadata(output);
-  output_metadata.column_metadata[0].set_name("col_other");
-  output_metadata.column_metadata[1].set_name("col_string");
-  output_metadata.column_metadata[2].set_name("col_float");
-  output_metadata.column_metadata[3].set_name("col_string2").set_output_as_binary(true);
-  output_metadata.column_metadata[4].set_name("col_binary").set_output_as_binary(true);
-
-  auto filepath = temp_env->get_temp_filepath("BinaryReadStrings.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, output)
-      .metadata(&output_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  auto expected_string = table_view{{int_col, string_col, float_col, string_col, string_col}};
-  auto expected_mixed  = table_view{{int_col, string_col, float_col, list_int_col, list_int_col}};
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .set_column_schema({{}, {}, {}, {}, {}});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view());
-
-  cudf::io::parquet_reader_options default_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  result = cudf::io::read_parquet(default_in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view());
-
-  std::vector<cudf::io::reader_column_schema> md{
-    {},
-    {},
-    {},
-    cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
-    cudf::io::reader_column_schema().set_convert_binary_to_strings(false)};
-
-  cudf::io::parquet_reader_options mixed_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .set_column_schema(md);
-  result = cudf::io::read_parquet(mixed_in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_mixed, result.tbl->view());
-}
-
-TEST_F(ParquetReaderTest, NestedByteArray)
-{
-  constexpr auto num_rows = 8;
-
-  auto seq_col0       = random_values<int>(num_rows);
-  auto seq_col2       = random_values<float>(num_rows);
-  auto seq_col3       = random_values<int8_t>(num_rows);
-  auto const validity = cudf::test::iterators::no_nulls();
-
-  column_wrapper<int> int_col{seq_col0.begin(), seq_col0.end(), validity};
-  column_wrapper<float> float_col{seq_col2.begin(), seq_col2.end(), validity};
-  cudf::test::lists_column_wrapper<int8_t> list_list_int_col{
-    {{'M', 'o', 'n', 'd', 'a', 'y'},
-     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-     {'F', 'r', 'i', 'd', 'a', 'y'}},
-    {{'M', 'o', 'n', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}},
-    {{'M', 'o', 'n', 'd', 'a', 'y'},
-     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-     {'F', 'r', 'i', 'd', 'a', 'y'}},
-    {{'F', 'r', 'i', 'd', 'a', 'y'},
-     {'F', 'r', 'i', 'd', 'a', 'y'},
-     {'F', 'u', 'n', 'd', 'a', 'y'}},
-    {{'M', 'o', 'n', 'd', 'a', 'y'},
-     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-     {'F', 'r', 'i', 'd', 'a', 'y'}},
-    {{'F', 'r', 'i', 'd', 'a', 'y'},
-     {'F', 'r', 'i', 'd', 'a', 'y'},
-     {'F', 'u', 'n', 'd', 'a', 'y'}},
-    {{'M', 'o', 'n', 'd', 'a', 'y'},
-     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-     {'F', 'r', 'i', 'd', 'a', 'y'}},
-    {{'M', 'o', 'n', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}}};
-
-  auto const expected = table_view{{int_col, float_col, list_list_int_col}};
-  cudf::io::table_input_metadata output_metadata(expected);
-  output_metadata.column_metadata[0].set_name("col_other");
-  output_metadata.column_metadata[1].set_name("col_float");
-  output_metadata.column_metadata[2].set_name("col_binary").child(1).set_output_as_binary(true);
-
-  auto filepath = temp_env->get_temp_filepath("NestedByteArray.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(&output_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  auto source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  EXPECT_EQ(fmd.schema[5].type, cudf::io::parquet::Type::BYTE_ARRAY);
-
-  std::vector<cudf::io::reader_column_schema> md{
-    {},
-    {},
-    cudf::io::reader_column_schema().add_child(
-      cudf::io::reader_column_schema().set_convert_binary_to_strings(false))};
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .set_column_schema(md);
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TEST_F(ParquetWriterTest, ByteArrayStats)
-{
-  // check that byte array min and max statistics are written as expected. If a byte array is
-  // written as a string, max utf8 is 0xf7bfbfbf and so the minimum value will be set to that value
-  // instead of a potential minimum higher than that.
-  std::vector<uint8_t> expected_col0_min{0xf0};
-  std::vector<uint8_t> expected_col0_max{0xf0, 0xf5, 0xf5};
-  std::vector<uint8_t> expected_col1_min{0xfe, 0xfe, 0xfe};
-  std::vector<uint8_t> expected_col1_max{0xfe, 0xfe, 0xfe};
-
-  cudf::test::lists_column_wrapper<uint8_t> list_int_col0{
-    {0xf0}, {0xf0, 0xf5, 0xf3}, {0xf0, 0xf5, 0xf5}};
-  cudf::test::lists_column_wrapper<uint8_t> list_int_col1{
-    {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}};
-
-  auto expected = table_view{{list_int_col0, list_int_col1}};
-  cudf::io::table_input_metadata output_metadata(expected);
-  output_metadata.column_metadata[0].set_name("col_binary0").set_output_as_binary(true);
-  output_metadata.column_metadata[1].set_name("col_binary1").set_output_as_binary(true);
-
-  auto filepath = temp_env->get_temp_filepath("ByteArrayStats.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(&output_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .set_column_schema({{}, {}});
-  auto result = cudf::io::read_parquet(in_opts);
-
-  auto source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-
-  EXPECT_EQ(fmd.schema[1].type, cudf::io::parquet::Type::BYTE_ARRAY);
-  EXPECT_EQ(fmd.schema[2].type, cudf::io::parquet::Type::BYTE_ARRAY);
-
-  auto const stats0 = parse_statistics(fmd.row_groups[0].columns[0]);
-  auto const stats1 = parse_statistics(fmd.row_groups[0].columns[1]);
-
-  EXPECT_EQ(expected_col0_min, stats0.min_value);
-  EXPECT_EQ(expected_col0_max, stats0.max_value);
-  EXPECT_EQ(expected_col1_min, stats1.min_value);
-  EXPECT_EQ(expected_col1_max, stats1.max_value);
-}
-
-TEST_F(ParquetReaderTest, StructByteArray)
-{
-  constexpr auto num_rows = 100;
-
-  auto seq_col0       = random_values<int8_t>(num_rows);
-  auto const validity = cudf::test::iterators::no_nulls();
-
-  column_wrapper<int8_t> int_col{seq_col0.begin(), seq_col0.end(), validity};
-  cudf::test::lists_column_wrapper<int8_t> list_of_int{{seq_col0.begin(), seq_col0.begin() + 50},
-                                                       {seq_col0.begin() + 50, seq_col0.end()}};
-  auto struct_col = cudf::test::structs_column_wrapper{{list_of_int}, validity};
-
-  auto const expected = table_view{{struct_col}};
-  EXPECT_EQ(1, expected.num_columns());
-  cudf::io::table_input_metadata output_metadata(expected);
-  output_metadata.column_metadata[0]
-    .set_name("struct_binary")
-    .child(0)
-    .set_name("a")
-    .set_output_as_binary(true);
-
-  auto filepath = temp_env->get_temp_filepath("StructByteArray.parquet");
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .metadata(&output_metadata);
-  cudf::io::write_parquet(out_opts);
-
-  std::vector<cudf::io::reader_column_schema> md{cudf::io::reader_column_schema().add_child(
-    cudf::io::reader_column_schema().set_convert_binary_to_strings(false))};
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .set_column_schema(md);
-  auto result = cudf::io::read_parquet(in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-}
-
-TEST_F(ParquetWriterTest, SingleValueDictionaryTest)
-{
-  constexpr unsigned int expected_bits = 1;
-  constexpr unsigned int nrows         = 1'000'000U;
-
-  auto elements = cudf::detail::make_counting_transform_iterator(
-    0, [](auto i) { return "a unique string value suffixed with 1"; });
-  auto const col0     = cudf::test::strings_column_wrapper(elements, elements + nrows);
-  auto const expected = table_view{{col0}};
-
-  auto const filepath = temp_env->get_temp_filepath("SingleValueDictionaryTest.parquet");
-  // set row group size so that there will be only one row group
-  // no compression so we can easily read page data
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .compression(cudf::io::compression_type::NONE)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .row_group_size_rows(nrows);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options default_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto const result = cudf::io::read_parquet(default_in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-
-  // make sure dictionary was used
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  auto used_dict = [&fmd]() {
-    for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) {
-      if (enc == cudf::io::parquet::Encoding::PLAIN_DICTIONARY or
-          enc == cudf::io::parquet::Encoding::RLE_DICTIONARY) {
-        return true;
-      }
-    }
-    return false;
-  };
-  EXPECT_TRUE(used_dict());
-
-  // and check that the correct number of bits was used
-  auto const oi    = read_offset_index(source, fmd.row_groups[0].columns[0]);
-  auto const nbits = read_dict_bits(source, oi.page_locations[0]);
-  EXPECT_EQ(nbits, expected_bits);
-}
-
-TEST_P(ParquetSizedTest, DictionaryTest)
-{
-  const unsigned int cardinality = (1 << (GetParam() - 1)) + 1;
-  const unsigned int nrows       = std::max(cardinality * 3 / 2, 3'000'000U);
-
-  auto elements       = cudf::detail::make_counting_transform_iterator(0, [cardinality](auto i) {
-    return "a unique string value suffixed with " + std::to_string(i % cardinality);
-  });
-  auto const col0     = cudf::test::strings_column_wrapper(elements, elements + nrows);
-  auto const expected = table_view{{col0}};
-
-  auto const filepath = temp_env->get_temp_filepath("DictionaryTest.parquet");
-  // set row group size so that there will be only one row group
-  // no compression so we can easily read page data
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
-      .compression(cudf::io::compression_type::NONE)
-      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
-      .row_group_size_rows(nrows)
-      .row_group_size_bytes(512 * 1024 * 1024);
-  cudf::io::write_parquet(out_opts);
-
-  cudf::io::parquet_reader_options default_in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-  auto const result = cudf::io::read_parquet(default_in_opts);
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
-
-  // make sure dictionary was used
-  auto const source = cudf::io::datasource::create(filepath);
-  cudf::io::parquet::FileMetaData fmd;
-
-  read_footer(source, &fmd);
-  auto used_dict = [&fmd]() {
-    for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) {
-      if (enc == cudf::io::parquet::Encoding::PLAIN_DICTIONARY or
-          enc == cudf::io::parquet::Encoding::RLE_DICTIONARY) {
-        return true;
-      }
-    }
-    return false;
-  };
-  EXPECT_TRUE(used_dict());
-
-  // and check that the correct number of bits was used
-  auto const oi    = read_offset_index(source, fmd.row_groups[0].columns[0]);
-  auto const nbits = read_dict_bits(source, oi.page_locations[0]);
-  EXPECT_EQ(nbits, GetParam());
-}
-
-CUDF_TEST_PROGRAM_MAIN()

From b250d6fc284d129dc5af7792bbe3a516973fd482 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 11 Oct 2022 13:24:58 -0700
Subject: [PATCH 022/162] Cleanup

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_preprocess.cu | 33 ++-----------------------
 1 file changed, 2 insertions(+), 31 deletions(-)

diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index d316fee9f07..111ca04502e 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -26,10 +26,7 @@
 #include <thrust/iterator/discard_iterator.h>
 #include <thrust/sort.h>
 
-namespace cudf {
-namespace io {
-namespace detail {
-namespace parquet {
+namespace cudf::io::detail::parquet {
 
 // Import functionality that's independent of legacy code
 using namespace cudf::io::parquet;
@@ -589,30 +586,4 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   }
 }
 
-/*
-{
-  std::mt19937 gen(6542);
-  std::bernoulli_distribution bn(0.7f);
-  //auto valids =
-//    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
-  auto values = thrust::make_counting_iterator(0);
-
-  constexpr size_type num_rows = 40000;
-  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
-  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
-
-  cudf::table_view t({a, b});
-  cudf::io::parquet_writer_options opts =
-cudf::io::parquet_writer_options::builder(cudf::io::sink_info{"parquet/tmp/chunked_splits.parquet"},
-t); cudf::io::write_parquet(opts);
-
-  cudf::io::parquet_reader_options in_opts =
-cudf::io::parquet_reader_options::builder(cudf::io::source_info{"parquet/tmp/chunked_splits.parquet"});
-  auto result = cudf::io::read_parquet(in_opts);
-}
-*/
-
-}  // namespace parquet
-}  // namespace detail
-}  // namespace io
-}  // namespace cudf
+}  // namespace cudf::io::detail::parquet

From e7a9e3eb76e2230548f65c5fe2cc37396d07333d Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 11 Oct 2022 15:00:17 -0700
Subject: [PATCH 023/162] Modify docs

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/parquet_gpu.hpp |  1 +
 cpp/src/io/parquet/reader_impl.hpp | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index a79ef046bb5..2680004af3b 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -236,6 +236,7 @@ struct ColumnChunkDesc {
   int32_t src_col_schema;  // my schema index in the file
 };
 
+// TODO: rename
 struct chunked_intermediate_data {
   rmm::device_uvector<int> page_keys;
   rmm::device_uvector<int> page_index;
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 04827c16994..8e7ff9733e1 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -150,6 +150,8 @@ class reader::impl {
   /**
    * @brief Preprocess column information and allocate output buffers.
    *
+   * TODO
+   *
    * There are several pieces of information we can't compute directly from row counts in
    * the parquet headers when dealing with nested schemas.
    * - The total sizes of all output columns at all nesting levels
@@ -173,6 +175,16 @@ class reader::impl {
     bool uses_custom_row_bounds,
     size_type chunked_read_size);
 
+  /**
+   * TODO
+   * @brief allocate_columns
+   * @param chunks
+   * @param pages
+   * @param id
+   * @param min_row
+   * @param total_rows
+   * @param uses_custom_row_bounds
+   */
   void allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                         hostdevice_vector<gpu::PageInfo>& pages,
                         gpu::chunked_intermediate_data const& id,

From 811354a7efb880c3fcc4e333ecdc2e423d4f2490 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 11 Oct 2022 15:00:20 -0700
Subject: [PATCH 024/162] Cleanup

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 58 -------------------------------
 1 file changed, 58 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index e0962520570..ed30dc96be8 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1348,64 +1348,6 @@ void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc>
   page_nesting_info.host_to_device(_stream);
 }
 
-/**
- * @copydoc cudf::io::detail::parquet::preprocess_columns
- */
-/*
-void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                      hostdevice_vector<gpu::PageInfo>& pages,
-                                      size_t min_row,
-                                      size_t total_rows,
-                                      bool uses_custom_row_bounds,
-                                      size_type chunked_read_size)
-{
-  // iterate over all input columns and allocate any associated output
-  // buffers if they are not part of a list hierarchy. mark down
-  // if we have any list columns that need further processing.
-  bool has_lists = false;
-  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
-    auto const& input_col = _input_columns[idx];
-    size_t max_depth      = input_col.nesting_depth();
-
-    auto* cols = &_output_columns;
-    for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
-      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
-      cols          = &out_buf.children;
-
-      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
-      // to know how big this buffer actually is.
-      if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
-        has_lists = true;
-      }
-      // if we haven't already processed this column because it is part of a struct hierarchy
-      else if (out_buf.size == 0) {
-        // add 1 for the offset if this is a list column
-        out_buf.create(
-          out_buf.type.id() == type_id::LIST && l_idx < max_depth ? total_rows + 1 : total_rows,
-          _stream,
-          _mr);
-      }
-    }
-  }
-
-  // if we have columns containing lists, or if we're doing chunked reads,
-  // further preprocessing is necessary.
-  if (has_lists || chunked_read_size > 0) {
-    gpu::PreprocessColumnData(pages,
-                              chunks,
-                              _input_columns,
-                              _output_columns,
-                              total_rows,
-                              min_row,
-                              uses_custom_row_bounds,
-                              chunked_read_size,
-                              _stream,
-                              _mr);
-    _stream.synchronize();
-  }
-}
-*/
-
 /**
  * @copydoc cudf::io::detail::parquet::decode_page_data
  */

From 12ba72ed6f3c8427ba8350ee984b6e7b92667798 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 11 Oct 2022 15:25:38 -0700
Subject: [PATCH 025/162] Add TODO

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 8e7ff9733e1..03b670a8e73 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -84,6 +84,8 @@ class reader::impl {
 
  private:
   /**
+   * TODO: Rename this into something more meaningful
+   *
    * @brief Reads compressed page data to device memory
    *
    * @param page_data Buffers to hold compressed page data for each chunk

From 45668ff61864abdd67f58c65b6e1d3a83e8612db Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 11 Oct 2022 16:10:20 -0700
Subject: [PATCH 026/162] Add `read_intermediate_data`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu  | 118 +++++++++++++++++++++++++++++
 cpp/src/io/parquet/reader_impl.hpp |  11 +++
 2 files changed, 129 insertions(+)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index ed30dc96be8..06be34ccc8d 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1568,6 +1568,124 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                               _timestamp_type.id());
 }
 
+std::pair<hostdevice_vector<gpu::ColumnChunkDesc>, hostdevice_vector<gpu::PageInfo>>
+reader::impl::read_intermediate_data(size_type skip_rows,
+                                     size_type num_rows,
+                                     const std::vector<std::vector<size_type>>& row_group_list)
+{
+  // Select only row groups required
+  const auto selected_row_groups =
+    _metadata->select_row_groups(row_group_list, skip_rows, num_rows);
+
+  // TODO: fix this
+  if (selected_row_groups.size() != 0 && _input_columns.size() != 0) {
+    return {hostdevice_vector<gpu::ColumnChunkDesc>(0, 0, _stream),
+            hostdevice_vector<gpu::PageInfo>(0, 0, _stream)};
+  }
+
+  // Descriptors for all the chunks that make up the selected columns
+  const auto num_input_columns = _input_columns.size();
+  const auto num_chunks        = selected_row_groups.size() * num_input_columns;
+  hostdevice_vector<gpu::ColumnChunkDesc> chunks(0, num_chunks, _stream);
+
+  // Association between each column chunk and its source
+  std::vector<size_type> chunk_source_map(num_chunks);
+
+  // Tracker for eventually deallocating compressed and uncompressed data
+  std::vector<std::unique_ptr<datasource::buffer>> page_data(num_chunks);
+
+  // Keep track of column chunk file offsets
+  std::vector<size_t> column_chunk_offsets(num_chunks);
+
+  // Initialize column chunk information
+  size_t total_decompressed_size = 0;
+  auto remaining_rows            = num_rows;
+  std::vector<std::future<void>> read_rowgroup_tasks;
+  for (const auto& rg : selected_row_groups) {
+    const auto& row_group       = _metadata->get_row_group(rg.index, rg.source_index);
+    auto const row_group_start  = rg.start_row;
+    auto const row_group_source = rg.source_index;
+    auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
+    auto const io_chunk_idx     = chunks.size();
+
+    // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
+    for (size_t i = 0; i < num_input_columns; ++i) {
+      auto col = _input_columns[i];
+      // look up metadata
+      auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx);
+      auto& schema   = _metadata->get_schema(col.schema_idx);
+
+      auto [type_width, clock_rate, converted_type] =
+        conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()),
+                        _timestamp_type.id(),
+                        schema.type,
+                        schema.converted_type,
+                        schema.type_length);
+
+      column_chunk_offsets[chunks.size()] =
+        (col_meta.dictionary_page_offset != 0)
+          ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
+          : col_meta.data_page_offset;
+
+      chunks.push_back(gpu::ColumnChunkDesc(col_meta.total_compressed_size,
+                                            nullptr,
+                                            col_meta.num_values,
+                                            schema.type,
+                                            type_width,
+                                            row_group_start,
+                                            row_group_rows,
+                                            schema.max_definition_level,
+                                            schema.max_repetition_level,
+                                            _metadata->get_output_nesting_depth(col.schema_idx),
+                                            required_bits(schema.max_definition_level),
+                                            required_bits(schema.max_repetition_level),
+                                            col_meta.codec,
+                                            converted_type,
+                                            schema.logical_type,
+                                            schema.decimal_scale,
+                                            clock_rate,
+                                            i,
+                                            col.schema_idx));
+
+      // Map each column chunk to its column index and its source index
+      chunk_source_map[chunks.size() - 1] = row_group_source;
+
+      if (col_meta.codec != Compression::UNCOMPRESSED) {
+        total_decompressed_size += col_meta.total_uncompressed_size;
+      }
+    }
+    // Read compressed chunk data to device memory
+    read_rowgroup_tasks.push_back(read_column_chunks(
+      page_data, chunks, io_chunk_idx, chunks.size(), column_chunk_offsets, chunk_source_map));
+
+    remaining_rows -= row_group.num_rows;
+  }
+  for (auto& task : read_rowgroup_tasks) {
+    task.wait();
+  }
+  assert(remaining_rows <= 0);
+
+  // Process dataset chunk pages into output columns
+  const auto total_pages = count_page_headers(chunks);
+  hostdevice_vector<gpu::PageInfo> pages(total_pages, total_pages, _stream);
+
+  if (total_pages > 0) {
+    rmm::device_buffer decomp_page_data;
+
+    // decoding of column/page information
+    decode_page_headers(chunks, pages);
+    if (total_decompressed_size > 0) {
+      decomp_page_data = decompress_page_data(chunks, pages);
+      // Free compressed data
+      for (size_t c = 0; c < chunks.size(); c++) {
+        if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) { page_data[c].reset(); }
+      }
+    }
+  }
+
+  return {std::move(chunks), std::move(pages)};
+}
+
 table_with_metadata reader::impl::read(size_type skip_rows,
                                        size_type num_rows,
                                        bool uses_custom_row_bounds,
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 03b670a8e73..faf5b8a48d3 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -83,6 +83,17 @@ class reader::impl {
                            std::vector<std::vector<size_type>> const& row_group_indices);
 
  private:
+  /**
+   * TODO
+   *
+   * @brief load_column_chunk_descriotions
+   * @return
+   */
+  std::pair<hostdevice_vector<gpu::ColumnChunkDesc>, hostdevice_vector<gpu::PageInfo>>
+  read_intermediate_data(size_type skip_rows,
+                         size_type num_rows,
+                         std::vector<std::vector<size_type>> const& row_group_list);
+
   /**
    * TODO: Rename this into something more meaningful
    *

From 1bb82543120aa3504b3f0853633302fa6067a49c Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 11 Oct 2022 16:14:28 -0700
Subject: [PATCH 027/162] Use `read_intermediate_data`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu  | 151 ++++++-----------------------
 cpp/src/io/parquet/reader_impl.hpp |   5 +-
 2 files changed, 36 insertions(+), 120 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 06be34ccc8d..fbf3a4a215a 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1568,7 +1568,10 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                               _timestamp_type.id());
 }
 
-std::pair<hostdevice_vector<gpu::ColumnChunkDesc>, hostdevice_vector<gpu::PageInfo>>
+std::tuple<hostdevice_vector<gpu::ColumnChunkDesc>,
+           hostdevice_vector<gpu::PageInfo>,
+           std::size_t,
+           hostdevice_vector<gpu::PageNestingInfo>>
 reader::impl::read_intermediate_data(size_type skip_rows,
                                      size_type num_rows,
                                      const std::vector<std::vector<size_type>>& row_group_list)
@@ -1580,7 +1583,9 @@ reader::impl::read_intermediate_data(size_type skip_rows,
   // TODO: fix this
   if (selected_row_groups.size() != 0 && _input_columns.size() != 0) {
     return {hostdevice_vector<gpu::ColumnChunkDesc>(0, 0, _stream),
-            hostdevice_vector<gpu::PageInfo>(0, 0, _stream)};
+            hostdevice_vector<gpu::PageInfo>(0, 0, _stream),
+            std::size_t{0},
+            hostdevice_vector<gpu::PageNestingInfo>{}};
   }
 
   // Descriptors for all the chunks that make up the selected columns
@@ -1668,6 +1673,7 @@ reader::impl::read_intermediate_data(size_type skip_rows,
   // Process dataset chunk pages into output columns
   const auto total_pages = count_page_headers(chunks);
   hostdevice_vector<gpu::PageInfo> pages(total_pages, total_pages, _stream);
+  hostdevice_vector<gpu::PageNestingInfo> page_nesting_info;
 
   if (total_pages > 0) {
     rmm::device_buffer decomp_page_data;
@@ -1681,9 +1687,29 @@ reader::impl::read_intermediate_data(size_type skip_rows,
         if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) { page_data[c].reset(); }
       }
     }
+
+    // build output column info
+    // walk the schema, building out_buffers that mirror what our final cudf columns will look
+    // like. important : there is not necessarily a 1:1 mapping between input columns and output
+    // columns. For example, parquet does not explicitly store a ColumnChunkDesc for struct
+    // columns. The "structiness" is simply implied by the schema.  For example, this schema:
+    //  required group field_id=1 name {
+    //    required binary field_id=2 firstname (String);
+    //    required binary field_id=3 middlename (String);
+    //    required binary field_id=4 lastname (String);
+    // }
+    // will only contain 3 columns of data (firstname, middlename, lastname).  But of course
+    // "name" is a struct column that we want to return, so we have to make sure that we
+    // create it ourselves.
+    // std::vector<output_column_info> output_info = build_output_column_info();
+
+    // nesting information (sizes, etc) stored -per page-
+    // note : even for flat schemas, we allocate 1 level of "nesting" info
+
+    allocate_nesting_info(chunks, pages, page_nesting_info);
   }
 
-  return {std::move(chunks), std::move(pages)};
+  return {std::move(chunks), std::move(pages), total_pages, std::move(page_nesting_info)};
 }
 
 table_with_metadata reader::impl::read(size_type skip_rows,
@@ -1701,125 +1727,12 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   std::vector<std::unique_ptr<column>> out_columns;
   out_columns.reserve(_output_columns.size());
 
-  if (selected_row_groups.size() != 0 && _input_columns.size() != 0) {
-    // Descriptors for all the chunks that make up the selected columns
-    const auto num_input_columns = _input_columns.size();
-    const auto num_chunks        = selected_row_groups.size() * num_input_columns;
-    hostdevice_vector<gpu::ColumnChunkDesc> chunks(0, num_chunks, _stream);
-
-    // Association between each column chunk and its source
-    std::vector<size_type> chunk_source_map(num_chunks);
-
-    // Tracker for eventually deallocating compressed and uncompressed data
-    std::vector<std::unique_ptr<datasource::buffer>> page_data(num_chunks);
-
-    // Keep track of column chunk file offsets
-    std::vector<size_t> column_chunk_offsets(num_chunks);
-
-    // Initialize column chunk information
-    size_t total_decompressed_size = 0;
-    auto remaining_rows            = num_rows;
-    std::vector<std::future<void>> read_rowgroup_tasks;
-    for (const auto& rg : selected_row_groups) {
-      const auto& row_group       = _metadata->get_row_group(rg.index, rg.source_index);
-      auto const row_group_start  = rg.start_row;
-      auto const row_group_source = rg.source_index;
-      auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
-      auto const io_chunk_idx     = chunks.size();
-
-      // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
-      for (size_t i = 0; i < num_input_columns; ++i) {
-        auto col = _input_columns[i];
-        // look up metadata
-        auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx);
-        auto& schema   = _metadata->get_schema(col.schema_idx);
-
-        auto [type_width, clock_rate, converted_type] =
-          conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()),
-                          _timestamp_type.id(),
-                          schema.type,
-                          schema.converted_type,
-                          schema.type_length);
-
-        column_chunk_offsets[chunks.size()] =
-          (col_meta.dictionary_page_offset != 0)
-            ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
-            : col_meta.data_page_offset;
-
-        chunks.push_back(gpu::ColumnChunkDesc(col_meta.total_compressed_size,
-                                              nullptr,
-                                              col_meta.num_values,
-                                              schema.type,
-                                              type_width,
-                                              row_group_start,
-                                              row_group_rows,
-                                              schema.max_definition_level,
-                                              schema.max_repetition_level,
-                                              _metadata->get_output_nesting_depth(col.schema_idx),
-                                              required_bits(schema.max_definition_level),
-                                              required_bits(schema.max_repetition_level),
-                                              col_meta.codec,
-                                              converted_type,
-                                              schema.logical_type,
-                                              schema.decimal_scale,
-                                              clock_rate,
-                                              i,
-                                              col.schema_idx));
-
-        // Map each column chunk to its column index and its source index
-        chunk_source_map[chunks.size() - 1] = row_group_source;
-
-        if (col_meta.codec != Compression::UNCOMPRESSED) {
-          total_decompressed_size += col_meta.total_uncompressed_size;
-        }
-      }
-      // Read compressed chunk data to device memory
-      read_rowgroup_tasks.push_back(read_column_chunks(
-        page_data, chunks, io_chunk_idx, chunks.size(), column_chunk_offsets, chunk_source_map));
-
-      remaining_rows -= row_group.num_rows;
-    }
-    for (auto& task : read_rowgroup_tasks) {
-      task.wait();
-    }
-    assert(remaining_rows <= 0);
+  auto [chunks, pages, total_pages, page_nesting_info] =
+    read_intermediate_data(skip_rows, num_rows, row_group_list);
 
+  if (selected_row_groups.size() != 0 && _input_columns.size() != 0) {
     // Process dataset chunk pages into output columns
-    const auto total_pages = count_page_headers(chunks);
     if (total_pages > 0) {
-      hostdevice_vector<gpu::PageInfo> pages(total_pages, total_pages, _stream);
-      rmm::device_buffer decomp_page_data;
-
-      // decoding of column/page information
-      decode_page_headers(chunks, pages);
-      if (total_decompressed_size > 0) {
-        decomp_page_data = decompress_page_data(chunks, pages);
-        // Free compressed data
-        for (size_t c = 0; c < chunks.size(); c++) {
-          if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) { page_data[c].reset(); }
-        }
-      }
-
-      // build output column info
-      // walk the schema, building out_buffers that mirror what our final cudf columns will look
-      // like. important : there is not necessarily a 1:1 mapping between input columns and output
-      // columns. For example, parquet does not explicitly store a ColumnChunkDesc for struct
-      // columns. The "structiness" is simply implied by the schema.  For example, this schema:
-      //  required group field_id=1 name {
-      //    required binary field_id=2 firstname (String);
-      //    required binary field_id=3 middlename (String);
-      //    required binary field_id=4 lastname (String);
-      // }
-      // will only contain 3 columns of data (firstname, middlename, lastname).  But of course
-      // "name" is a struct column that we want to return, so we have to make sure that we
-      // create it ourselves.
-      // std::vector<output_column_info> output_info = build_output_column_info();
-
-      // nesting information (sizes, etc) stored -per page-
-      // note : even for flat schemas, we allocate 1 level of "nesting" info
-      hostdevice_vector<gpu::PageNestingInfo> page_nesting_info;
-      allocate_nesting_info(chunks, pages, page_nesting_info);
-
       // - compute column sizes and allocate output buffers.
       //   important:
       //   for nested schemas, we have to do some further preprocessing to determine:
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index faf5b8a48d3..04b118b4954 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -89,7 +89,10 @@ class reader::impl {
    * @brief load_column_chunk_descriotions
    * @return
    */
-  std::pair<hostdevice_vector<gpu::ColumnChunkDesc>, hostdevice_vector<gpu::PageInfo>>
+  std::tuple<hostdevice_vector<gpu::ColumnChunkDesc>,
+             hostdevice_vector<gpu::PageInfo>,
+             std::size_t,
+             hostdevice_vector<gpu::PageNestingInfo>>
   read_intermediate_data(size_type skip_rows,
                          size_type num_rows,
                          std::vector<std::vector<size_type>> const& row_group_list);

From 56715ef54b50da0cf0c90c9c1364cbbd303cc1cb Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 12 Oct 2022 11:34:37 -0700
Subject: [PATCH 028/162] Fix bug

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index fbf3a4a215a..2ee29b97212 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1581,7 +1581,7 @@ reader::impl::read_intermediate_data(size_type skip_rows,
     _metadata->select_row_groups(row_group_list, skip_rows, num_rows);
 
   // TODO: fix this
-  if (selected_row_groups.size() != 0 && _input_columns.size() != 0) {
+  if (selected_row_groups.size() == 0 || _input_columns.size() == 0) {
     return {hostdevice_vector<gpu::ColumnChunkDesc>(0, 0, _stream),
             hostdevice_vector<gpu::PageInfo>(0, 0, _stream),
             std::size_t{0},

From a7e7e93e7037291d4a0af89b8a8529baf6cdf94f Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 12 Oct 2022 11:45:05 -0700
Subject: [PATCH 029/162] Simplify code

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu  | 93 +++++++++++++++---------------
 cpp/src/io/parquet/reader_impl.hpp |  4 +-
 2 files changed, 47 insertions(+), 50 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 2ee29b97212..910caf4ddfc 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1568,9 +1568,9 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                               _timestamp_type.id());
 }
 
-std::tuple<hostdevice_vector<gpu::ColumnChunkDesc>,
+std::tuple<bool,
+           hostdevice_vector<gpu::ColumnChunkDesc>,
            hostdevice_vector<gpu::PageInfo>,
-           std::size_t,
            hostdevice_vector<gpu::PageNestingInfo>>
 reader::impl::read_intermediate_data(size_type skip_rows,
                                      size_type num_rows,
@@ -1582,9 +1582,9 @@ reader::impl::read_intermediate_data(size_type skip_rows,
 
   // TODO: fix this
   if (selected_row_groups.size() == 0 || _input_columns.size() == 0) {
-    return {hostdevice_vector<gpu::ColumnChunkDesc>(0, 0, _stream),
+    return {false,
+            hostdevice_vector<gpu::ColumnChunkDesc>(0, 0, _stream),
             hostdevice_vector<gpu::PageInfo>(0, 0, _stream),
-            std::size_t{0},
             hostdevice_vector<gpu::PageNestingInfo>{}};
   }
 
@@ -1709,7 +1709,7 @@ reader::impl::read_intermediate_data(size_type skip_rows,
     allocate_nesting_info(chunks, pages, page_nesting_info);
   }
 
-  return {std::move(chunks), std::move(pages), total_pages, std::move(page_nesting_info)};
+  return {total_pages > 0, std::move(chunks), std::move(pages), std::move(page_nesting_info)};
 }
 
 table_with_metadata reader::impl::read(size_type skip_rows,
@@ -1727,51 +1727,48 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   std::vector<std::unique_ptr<column>> out_columns;
   out_columns.reserve(_output_columns.size());
 
-  auto [chunks, pages, total_pages, page_nesting_info] =
+  auto [has_read_data, chunks, pages, page_nesting_info] =
     read_intermediate_data(skip_rows, num_rows, row_group_list);
 
-  if (selected_row_groups.size() != 0 && _input_columns.size() != 0) {
-    // Process dataset chunk pages into output columns
-    if (total_pages > 0) {
-      // - compute column sizes and allocate output buffers.
-      //   important:
-      //   for nested schemas, we have to do some further preprocessing to determine:
-      //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
-      //    of
-      //      nesting and it's size is the row count)
-      //
-      // - for nested schemas, output buffer offset values per-page, per nesting-level for the
-      // purposes of decoding.
-      // TODO: make this a parameter.
-      auto const chunked_read_size = 240000;
-      //      auto const chunked_read_size = 0;
-      auto chunk_reads = preprocess_columns(
-        chunks, pages, skip_rows, num_rows, uses_custom_row_bounds, chunked_read_size);
-
-      // process each chunk. this is the part that would be externalized into multiple calls
-      auto read_info = chunk_reads.second[0];
-      {
-        // allocate outgoing columns
-        allocate_columns(chunks,
-                         pages,
-                         chunk_reads.first,
-                         read_info.skip_rows,
-                         read_info.num_rows,
-                         uses_custom_row_bounds);
-
-        // decoding column data
-        decode_page_data(chunks, pages, page_nesting_info, read_info.skip_rows, read_info.num_rows);
-
-        // create the final output cudf columns
-        for (size_t i = 0; i < _output_columns.size(); ++i) {
-          column_name_info& col_name = out_metadata.schema_info.emplace_back("");
-          auto const metadata =
-            _reader_column_schema.has_value()
-              ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
-              : std::nullopt;
-          out_columns.emplace_back(
-            make_column(_output_columns[i], &col_name, metadata, _stream, _mr));
-        }
+  if (has_read_data) {
+    // - compute column sizes and allocate output buffers.
+    //   important:
+    //   for nested schemas, we have to do some further preprocessing to determine:
+    //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
+    //    of
+    //      nesting and it's size is the row count)
+    //
+    // - for nested schemas, output buffer offset values per-page, per nesting-level for the
+    // purposes of decoding.
+    // TODO: make this a parameter.
+    auto const chunked_read_size = 240000;
+    //      auto const chunked_read_size = 0;
+    auto chunk_reads = preprocess_columns(
+      chunks, pages, skip_rows, num_rows, uses_custom_row_bounds, chunked_read_size);
+
+    // process each chunk. this is the part that would be externalized into multiple calls
+    auto read_info = chunk_reads.second[0];
+    {
+      // allocate outgoing columns
+      allocate_columns(chunks,
+                       pages,
+                       chunk_reads.first,
+                       read_info.skip_rows,
+                       read_info.num_rows,
+                       uses_custom_row_bounds);
+
+      // decoding column data
+      decode_page_data(chunks, pages, page_nesting_info, read_info.skip_rows, read_info.num_rows);
+
+      // create the final output cudf columns
+      for (size_t i = 0; i < _output_columns.size(); ++i) {
+        column_name_info& col_name = out_metadata.schema_info.emplace_back("");
+        auto const metadata =
+          _reader_column_schema.has_value()
+            ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
+            : std::nullopt;
+        out_columns.emplace_back(
+          make_column(_output_columns[i], &col_name, metadata, _stream, _mr));
       }
     }
   }
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 04b118b4954..ee243f6269e 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -89,9 +89,9 @@ class reader::impl {
    * @brief load_column_chunk_descriotions
    * @return
    */
-  std::tuple<hostdevice_vector<gpu::ColumnChunkDesc>,
+  std::tuple<bool,
+             hostdevice_vector<gpu::ColumnChunkDesc>,
              hostdevice_vector<gpu::PageInfo>,
-             std::size_t,
              hostdevice_vector<gpu::PageNestingInfo>>
   read_intermediate_data(size_type skip_rows,
                          size_type num_rows,

From 8fe87b1152e856543060dfd6d1205f7ed403b38b Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 12 Oct 2022 14:04:00 -0700
Subject: [PATCH 030/162] Implement `file_intermediate_data`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/parquet_gpu.hpp |  18 +++--
 cpp/src/io/parquet/reader_impl.cu  | 117 +++++++++++++++--------------
 cpp/src/io/parquet/reader_impl.hpp |  13 ++--
 3 files changed, 78 insertions(+), 70 deletions(-)

diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 2680004af3b..f4207354831 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -236,14 +236,18 @@ struct ColumnChunkDesc {
   int32_t src_col_schema;  // my schema index in the file
 };
 
-// TODO: rename
+// TODO: rename?
+struct file_intermediate_data {
+  hostdevice_vector<gpu::ColumnChunkDesc> chunks{};
+  hostdevice_vector<gpu::PageInfo> pages_info{};
+  hostdevice_vector<gpu::PageNestingInfo> page_nesting_info{};
+  bool has_data{false};
+};
+
+// TODO: rename?
 struct chunked_intermediate_data {
-  rmm::device_uvector<int> page_keys;
-  rmm::device_uvector<int> page_index;
-  chunked_intermediate_data()
-    : page_keys(0, rmm::cuda_stream_default), page_index(0, rmm::cuda_stream_default)
-  {
-  }
+  rmm::device_uvector<int> page_keys{0, rmm::cuda_stream_default};
+  rmm::device_uvector<int> page_index{0, rmm::cuda_stream_default};
 };
 
 struct chunked_read_info {
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 910caf4ddfc..fda59b72c4b 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1568,30 +1568,26 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                               _timestamp_type.id());
 }
 
-std::tuple<bool,
-           hostdevice_vector<gpu::ColumnChunkDesc>,
-           hostdevice_vector<gpu::PageInfo>,
-           hostdevice_vector<gpu::PageNestingInfo>>
-reader::impl::read_intermediate_data(size_type skip_rows,
-                                     size_type num_rows,
-                                     const std::vector<std::vector<size_type>>& row_group_list)
+gpu::file_intermediate_data reader::impl::preprocess_file(
+  size_type skip_rows,
+  size_type num_rows,
+  const std::vector<std::vector<size_type>>& row_group_list)
 {
+  gpu::file_intermediate_data output{};
+
   // Select only row groups required
   const auto selected_row_groups =
     _metadata->select_row_groups(row_group_list, skip_rows, num_rows);
 
   // TODO: fix this
-  if (selected_row_groups.size() == 0 || _input_columns.size() == 0) {
-    return {false,
-            hostdevice_vector<gpu::ColumnChunkDesc>(0, 0, _stream),
-            hostdevice_vector<gpu::PageInfo>(0, 0, _stream),
-            hostdevice_vector<gpu::PageNestingInfo>{}};
-  }
+  if (selected_row_groups.size() == 0 || _input_columns.size() == 0) { return output; }
+
+  output.has_data = true;
 
   // Descriptors for all the chunks that make up the selected columns
   const auto num_input_columns = _input_columns.size();
   const auto num_chunks        = selected_row_groups.size() * num_input_columns;
-  hostdevice_vector<gpu::ColumnChunkDesc> chunks(0, num_chunks, _stream);
+  output.chunks                = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
 
   // Association between each column chunk and its source
   std::vector<size_type> chunk_source_map(num_chunks);
@@ -1611,7 +1607,7 @@ reader::impl::read_intermediate_data(size_type skip_rows,
     auto const row_group_start  = rg.start_row;
     auto const row_group_source = rg.source_index;
     auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
-    auto const io_chunk_idx     = chunks.size();
+    auto const io_chunk_idx     = output.chunks.size();
 
     // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
     for (size_t i = 0; i < num_input_columns; ++i) {
@@ -1627,41 +1623,46 @@ reader::impl::read_intermediate_data(size_type skip_rows,
                         schema.converted_type,
                         schema.type_length);
 
-      column_chunk_offsets[chunks.size()] =
+      column_chunk_offsets[output.chunks.size()] =
         (col_meta.dictionary_page_offset != 0)
           ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
           : col_meta.data_page_offset;
 
-      chunks.push_back(gpu::ColumnChunkDesc(col_meta.total_compressed_size,
-                                            nullptr,
-                                            col_meta.num_values,
-                                            schema.type,
-                                            type_width,
-                                            row_group_start,
-                                            row_group_rows,
-                                            schema.max_definition_level,
-                                            schema.max_repetition_level,
-                                            _metadata->get_output_nesting_depth(col.schema_idx),
-                                            required_bits(schema.max_definition_level),
-                                            required_bits(schema.max_repetition_level),
-                                            col_meta.codec,
-                                            converted_type,
-                                            schema.logical_type,
-                                            schema.decimal_scale,
-                                            clock_rate,
-                                            i,
-                                            col.schema_idx));
+      output.chunks.push_back(
+        gpu::ColumnChunkDesc(col_meta.total_compressed_size,
+                             nullptr,
+                             col_meta.num_values,
+                             schema.type,
+                             type_width,
+                             row_group_start,
+                             row_group_rows,
+                             schema.max_definition_level,
+                             schema.max_repetition_level,
+                             _metadata->get_output_nesting_depth(col.schema_idx),
+                             required_bits(schema.max_definition_level),
+                             required_bits(schema.max_repetition_level),
+                             col_meta.codec,
+                             converted_type,
+                             schema.logical_type,
+                             schema.decimal_scale,
+                             clock_rate,
+                             i,
+                             col.schema_idx));
 
       // Map each column chunk to its column index and its source index
-      chunk_source_map[chunks.size() - 1] = row_group_source;
+      chunk_source_map[output.chunks.size() - 1] = row_group_source;
 
       if (col_meta.codec != Compression::UNCOMPRESSED) {
         total_decompressed_size += col_meta.total_uncompressed_size;
       }
     }
     // Read compressed chunk data to device memory
-    read_rowgroup_tasks.push_back(read_column_chunks(
-      page_data, chunks, io_chunk_idx, chunks.size(), column_chunk_offsets, chunk_source_map));
+    read_rowgroup_tasks.push_back(read_column_chunks(page_data,
+                                                     output.chunks,
+                                                     io_chunk_idx,
+                                                     output.chunks.size(),
+                                                     column_chunk_offsets,
+                                                     chunk_source_map));
 
     remaining_rows -= row_group.num_rows;
   }
@@ -1671,20 +1672,19 @@ reader::impl::read_intermediate_data(size_type skip_rows,
   assert(remaining_rows <= 0);
 
   // Process dataset chunk pages into output columns
-  const auto total_pages = count_page_headers(chunks);
-  hostdevice_vector<gpu::PageInfo> pages(total_pages, total_pages, _stream);
-  hostdevice_vector<gpu::PageNestingInfo> page_nesting_info;
+  const auto total_pages = count_page_headers(output.chunks);
+  output.pages_info      = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
 
   if (total_pages > 0) {
     rmm::device_buffer decomp_page_data;
 
     // decoding of column/page information
-    decode_page_headers(chunks, pages);
+    decode_page_headers(output.chunks, output.pages_info);
     if (total_decompressed_size > 0) {
-      decomp_page_data = decompress_page_data(chunks, pages);
+      decomp_page_data = decompress_page_data(output.chunks, output.pages_info);
       // Free compressed data
-      for (size_t c = 0; c < chunks.size(); c++) {
-        if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) { page_data[c].reset(); }
+      for (size_t c = 0; c < output.chunks.size(); c++) {
+        if (output.chunks[c].codec != parquet::Compression::UNCOMPRESSED) { page_data[c].reset(); }
       }
     }
 
@@ -1706,10 +1706,10 @@ reader::impl::read_intermediate_data(size_type skip_rows,
     // nesting information (sizes, etc) stored -per page-
     // note : even for flat schemas, we allocate 1 level of "nesting" info
 
-    allocate_nesting_info(chunks, pages, page_nesting_info);
+    allocate_nesting_info(output.chunks, output.pages_info, output.page_nesting_info);
   }
 
-  return {total_pages > 0, std::move(chunks), std::move(pages), std::move(page_nesting_info)};
+  return output;
 }
 
 table_with_metadata reader::impl::read(size_type skip_rows,
@@ -1727,10 +1727,9 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   std::vector<std::unique_ptr<column>> out_columns;
   out_columns.reserve(_output_columns.size());
 
-  auto [has_read_data, chunks, pages, page_nesting_info] =
-    read_intermediate_data(skip_rows, num_rows, row_group_list);
+  auto intermediate_data = preprocess_file(skip_rows, num_rows, row_group_list);
 
-  if (has_read_data) {
+  if (intermediate_data.has_data) {
     // - compute column sizes and allocate output buffers.
     //   important:
     //   for nested schemas, we have to do some further preprocessing to determine:
@@ -1743,22 +1742,30 @@ table_with_metadata reader::impl::read(size_type skip_rows,
     // TODO: make this a parameter.
     auto const chunked_read_size = 240000;
     //      auto const chunked_read_size = 0;
-    auto chunk_reads = preprocess_columns(
-      chunks, pages, skip_rows, num_rows, uses_custom_row_bounds, chunked_read_size);
+    auto chunk_reads = preprocess_columns(intermediate_data.chunks,
+                                          intermediate_data.pages_info,
+                                          skip_rows,
+                                          num_rows,
+                                          uses_custom_row_bounds,
+                                          chunked_read_size);
 
     // process each chunk. this is the part that would be externalized into multiple calls
     auto read_info = chunk_reads.second[0];
     {
       // allocate outgoing columns
-      allocate_columns(chunks,
-                       pages,
+      allocate_columns(intermediate_data.chunks,
+                       intermediate_data.pages_info,
                        chunk_reads.first,
                        read_info.skip_rows,
                        read_info.num_rows,
                        uses_custom_row_bounds);
 
       // decoding column data
-      decode_page_data(chunks, pages, page_nesting_info, read_info.skip_rows, read_info.num_rows);
+      decode_page_data(intermediate_data.chunks,
+                       intermediate_data.pages_info,
+                       intermediate_data.page_nesting_info,
+                       read_info.skip_rows,
+                       read_info.num_rows);
 
       // create the final output cudf columns
       for (size_t i = 0; i < _output_columns.size(); ++i) {
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index ee243f6269e..7151a71eedb 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -82,21 +82,18 @@ class reader::impl {
                            bool uses_custom_row_bounds,
                            std::vector<std::vector<size_type>> const& row_group_indices);
 
- private:
   /**
    * TODO
    *
    * @brief load_column_chunk_descriotions
    * @return
    */
-  std::tuple<bool,
-             hostdevice_vector<gpu::ColumnChunkDesc>,
-             hostdevice_vector<gpu::PageInfo>,
-             hostdevice_vector<gpu::PageNestingInfo>>
-  read_intermediate_data(size_type skip_rows,
-                         size_type num_rows,
-                         std::vector<std::vector<size_type>> const& row_group_list);
+  gpu::file_intermediate_data preprocess_file(
+    size_type skip_rows,
+    size_type num_rows,
+    std::vector<std::vector<size_type>> const& row_group_list);
 
+ private:
   /**
    * TODO: Rename this into something more meaningful
    *

From 464f4f9a49819fa8cedda864c503ca1c11bc43a2 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 12 Oct 2022 14:09:44 -0700
Subject: [PATCH 031/162] Add `make_output`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu  | 44 +++++++++++++++++-------------
 cpp/src/io/parquet/reader_impl.hpp |  3 ++
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index fda59b72c4b..2326cec8f45 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1712,6 +1712,30 @@ gpu::file_intermediate_data reader::impl::preprocess_file(
   return output;
 }
 
+table_with_metadata reader::impl::make_output(table_metadata& out_metadata,
+                                              std::vector<std::unique_ptr<column>>& out_columns)
+{
+  // Create empty columns as needed (this can happen if we've ended up with no actual data to read)
+  for (size_t i = out_columns.size(); i < _output_columns.size(); ++i) {
+    column_name_info& col_name = out_metadata.schema_info.emplace_back("");
+    out_columns.emplace_back(io::detail::empty_like(_output_columns[i], &col_name, _stream, _mr));
+  }
+
+  // Return column names (must match order of returned columns)
+  out_metadata.column_names.resize(_output_columns.size());
+  for (size_t i = 0; i < _output_column_schemas.size(); i++) {
+    auto const& schema           = _metadata->get_schema(_output_column_schemas[i]);
+    out_metadata.column_names[i] = schema.name;
+  }
+
+  // Return user metadata
+  out_metadata.per_file_user_data = _metadata->get_key_value_metadata();
+  out_metadata.user_data          = {out_metadata.per_file_user_data[0].begin(),
+                            out_metadata.per_file_user_data[0].end()};
+
+  return {std::make_unique<table>(std::move(out_columns)), std::move(out_metadata)};
+}
+
 table_with_metadata reader::impl::read(size_type skip_rows,
                                        size_type num_rows,
                                        bool uses_custom_row_bounds,
@@ -1780,25 +1804,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
     }
   }
 
-  // Create empty columns as needed (this can happen if we've ended up with no actual data to read)
-  for (size_t i = out_columns.size(); i < _output_columns.size(); ++i) {
-    column_name_info& col_name = out_metadata.schema_info.emplace_back("");
-    out_columns.emplace_back(io::detail::empty_like(_output_columns[i], &col_name, _stream, _mr));
-  }
-
-  // Return column names (must match order of returned columns)
-  out_metadata.column_names.resize(_output_columns.size());
-  for (size_t i = 0; i < _output_column_schemas.size(); i++) {
-    auto const& schema           = _metadata->get_schema(_output_column_schemas[i]);
-    out_metadata.column_names[i] = schema.name;
-  }
-
-  // Return user metadata
-  out_metadata.per_file_user_data = _metadata->get_key_value_metadata();
-  out_metadata.user_data          = {out_metadata.per_file_user_data[0].begin(),
-                            out_metadata.per_file_user_data[0].end()};
-
-  return {std::make_unique<table>(std::move(out_columns)), std::move(out_metadata)};
+  return make_output(out_metadata, out_columns);
 }
 
 // Forward to implementation
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 7151a71eedb..60cbd9197d3 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -93,6 +93,9 @@ class reader::impl {
     size_type num_rows,
     std::vector<std::vector<size_type>> const& row_group_list);
 
+  table_with_metadata make_output(table_metadata& out_metadata,
+                                  std::vector<std::unique_ptr<column>>& out_columns);
+
  private:
   /**
    * TODO: Rename this into something more meaningful

From 56756d6ba7afeba6087782a2a2e2ae26a8748209 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 12 Oct 2022 14:24:16 -0700
Subject: [PATCH 032/162] Implement `read_chunk`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu  | 125 +++++++++++++++--------------
 cpp/src/io/parquet/reader_impl.hpp |  24 +++++-
 2 files changed, 87 insertions(+), 62 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 2326cec8f45..df4b18607cd 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1712,8 +1712,46 @@ gpu::file_intermediate_data reader::impl::preprocess_file(
   return output;
 }
 
-table_with_metadata reader::impl::make_output(table_metadata& out_metadata,
-                                              std::vector<std::unique_ptr<column>>& out_columns)
+table_with_metadata reader::impl::read_chunk(gpu::file_intermediate_data& file_data,
+                                             gpu::chunked_intermediate_data& chunk_data,
+                                             gpu::chunked_read_info const& read_info,
+                                             bool uses_custom_row_bounds)
+{
+  table_metadata out_metadata;
+
+  // output cudf columns as determined by the top level schema
+  std::vector<std::unique_ptr<column>> out_columns;
+  out_columns.reserve(_output_columns.size());
+
+  // allocate outgoing columns
+  allocate_columns(file_data.chunks,
+                   file_data.pages_info,
+                   chunk_data,
+                   read_info.skip_rows,
+                   read_info.num_rows,
+                   uses_custom_row_bounds);
+
+  // decoding column data
+  decode_page_data(file_data.chunks,
+                   file_data.pages_info,
+                   file_data.page_nesting_info,
+                   read_info.skip_rows,
+                   read_info.num_rows);
+
+  // create the final output cudf columns
+  for (size_t i = 0; i < _output_columns.size(); ++i) {
+    column_name_info& col_name = out_metadata.schema_info.emplace_back("");
+    auto const metadata        = _reader_column_schema.has_value()
+                                   ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
+                                   : std::nullopt;
+    out_columns.emplace_back(make_column(_output_columns[i], &col_name, metadata, _stream, _mr));
+  }
+
+  return finalize_output(out_metadata, out_columns);
+}
+
+table_with_metadata reader::impl::finalize_output(table_metadata& out_metadata,
+                                                  std::vector<std::unique_ptr<column>>& out_columns)
 {
   // Create empty columns as needed (this can happen if we've ended up with no actual data to read)
   for (size_t i = out_columns.size(); i < _output_columns.size(); ++i) {
@@ -1745,66 +1783,33 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   const auto selected_row_groups =
     _metadata->select_row_groups(row_group_list, skip_rows, num_rows);
 
-  table_metadata out_metadata;
-
-  // output cudf columns as determined by the top level schema
-  std::vector<std::unique_ptr<column>> out_columns;
-  out_columns.reserve(_output_columns.size());
-
-  auto intermediate_data = preprocess_file(skip_rows, num_rows, row_group_list);
+  auto file_data = preprocess_file(skip_rows, num_rows, row_group_list);
 
-  if (intermediate_data.has_data) {
-    // - compute column sizes and allocate output buffers.
-    //   important:
-    //   for nested schemas, we have to do some further preprocessing to determine:
-    //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
-    //    of
-    //      nesting and it's size is the row count)
-    //
-    // - for nested schemas, output buffer offset values per-page, per nesting-level for the
-    // purposes of decoding.
-    // TODO: make this a parameter.
-    auto const chunked_read_size = 240000;
-    //      auto const chunked_read_size = 0;
-    auto chunk_reads = preprocess_columns(intermediate_data.chunks,
-                                          intermediate_data.pages_info,
-                                          skip_rows,
-                                          num_rows,
-                                          uses_custom_row_bounds,
-                                          chunked_read_size);
-
-    // process each chunk. this is the part that would be externalized into multiple calls
-    auto read_info = chunk_reads.second[0];
-    {
-      // allocate outgoing columns
-      allocate_columns(intermediate_data.chunks,
-                       intermediate_data.pages_info,
-                       chunk_reads.first,
-                       read_info.skip_rows,
-                       read_info.num_rows,
-                       uses_custom_row_bounds);
-
-      // decoding column data
-      decode_page_data(intermediate_data.chunks,
-                       intermediate_data.pages_info,
-                       intermediate_data.page_nesting_info,
-                       read_info.skip_rows,
-                       read_info.num_rows);
-
-      // create the final output cudf columns
-      for (size_t i = 0; i < _output_columns.size(); ++i) {
-        column_name_info& col_name = out_metadata.schema_info.emplace_back("");
-        auto const metadata =
-          _reader_column_schema.has_value()
-            ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
-            : std::nullopt;
-        out_columns.emplace_back(
-          make_column(_output_columns[i], &col_name, metadata, _stream, _mr));
-      }
-    }
-  }
+  // todo: fix this (should be empty instead of null)
+  if (!file_data.has_data) { return table_with_metadata{}; }
 
-  return make_output(out_metadata, out_columns);
+  // - compute column sizes and allocate output buffers.
+  //   important:
+  //   for nested schemas, we have to do some further preprocessing to determine:
+  //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
+  //    of
+  //      nesting and it's size is the row count)
+  //
+  // - for nested schemas, output buffer offset values per-page, per nesting-level for the
+  // purposes of decoding.
+  // TODO: make this a parameter.
+  auto const chunked_read_size = 240000;
+  //      auto const chunked_read_size = 0;
+  auto chunk_reads = preprocess_columns(file_data.chunks,
+                                        file_data.pages_info,
+                                        skip_rows,
+                                        num_rows,
+                                        uses_custom_row_bounds,
+                                        chunked_read_size);
+
+  // process each chunk. this is the part that would be externalized into multiple calls
+  auto read_info = chunk_reads.second[0];
+  return read_chunk(file_data, chunk_reads.first, read_info, uses_custom_row_bounds);
 }
 
 // Forward to implementation
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 60cbd9197d3..c76dcbaa558 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -93,10 +93,30 @@ class reader::impl {
     size_type num_rows,
     std::vector<std::vector<size_type>> const& row_group_list);
 
-  table_with_metadata make_output(table_metadata& out_metadata,
-                                  std::vector<std::unique_ptr<column>>& out_columns);
+  /**
+   * TODO
+   *
+   * @brief read_chunk
+   * @param chunk_info
+   * @return
+   */
+  table_with_metadata read_chunk(gpu::file_intermediate_data& file_data,
+                                 gpu::chunked_intermediate_data& chunk_data,
+                                 gpu::chunked_read_info const& read_info,
+                                 bool uses_custom_row_bounds);
 
  private:
+  /**
+   * TODO
+   *
+   * @brief make_output
+   * @param out_metadata
+   * @param out_columns
+   * @return
+   */
+  table_with_metadata finalize_output(table_metadata& out_metadata,
+                                      std::vector<std::unique_ptr<column>>& out_columns);
+
   /**
    * TODO: Rename this into something more meaningful
    *

From 3044ac503ae4a1f2a516d01cec01ab4504437c12 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 12 Oct 2022 14:28:42 -0700
Subject: [PATCH 033/162] Cleanup

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index df4b18607cd..747e2eea118 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1779,10 +1779,6 @@ table_with_metadata reader::impl::read(size_type skip_rows,
                                        bool uses_custom_row_bounds,
                                        std::vector<std::vector<size_type>> const& row_group_list)
 {
-  // Select only row groups required
-  const auto selected_row_groups =
-    _metadata->select_row_groups(row_group_list, skip_rows, num_rows);
-
   auto file_data = preprocess_file(skip_rows, num_rows, row_group_list);
 
   // todo: fix this (should be empty instead of null)

From ffb8a1901d0438646d6f484bb6bf9b22f7d564f1 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 12 Oct 2022 15:33:04 -0700
Subject: [PATCH 034/162] Fix bug when `skip_rows` and `num_rows` are modified
 inside a called function

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu  | 50 +++++++++++++++++-------------
 cpp/src/io/parquet/reader_impl.hpp |  2 +-
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 747e2eea118..35b5c5f442f 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1568,26 +1568,29 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                               _timestamp_type.id());
 }
 
-gpu::file_intermediate_data reader::impl::preprocess_file(
+std::tuple<gpu::file_intermediate_data, size_type, size_type> reader::impl::preprocess_file(
   size_type skip_rows,
   size_type num_rows,
   const std::vector<std::vector<size_type>>& row_group_list)
 {
-  gpu::file_intermediate_data output{};
+  gpu::file_intermediate_data file_data{};
 
   // Select only row groups required
+  // Note: `skip_rows` and `num_rows` will be modified in this function.
   const auto selected_row_groups =
     _metadata->select_row_groups(row_group_list, skip_rows, num_rows);
 
   // TODO: fix this
-  if (selected_row_groups.size() == 0 || _input_columns.size() == 0) { return output; }
+  if (selected_row_groups.size() == 0 || _input_columns.size() == 0) {
+    return {std::move(file_data), skip_rows, num_rows};
+  }
 
-  output.has_data = true;
+  file_data.has_data = true;
 
   // Descriptors for all the chunks that make up the selected columns
   const auto num_input_columns = _input_columns.size();
   const auto num_chunks        = selected_row_groups.size() * num_input_columns;
-  output.chunks                = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
+  file_data.chunks             = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
 
   // Association between each column chunk and its source
   std::vector<size_type> chunk_source_map(num_chunks);
@@ -1607,7 +1610,7 @@ gpu::file_intermediate_data reader::impl::preprocess_file(
     auto const row_group_start  = rg.start_row;
     auto const row_group_source = rg.source_index;
     auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
-    auto const io_chunk_idx     = output.chunks.size();
+    auto const io_chunk_idx     = file_data.chunks.size();
 
     // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
     for (size_t i = 0; i < num_input_columns; ++i) {
@@ -1623,12 +1626,12 @@ gpu::file_intermediate_data reader::impl::preprocess_file(
                         schema.converted_type,
                         schema.type_length);
 
-      column_chunk_offsets[output.chunks.size()] =
+      column_chunk_offsets[file_data.chunks.size()] =
         (col_meta.dictionary_page_offset != 0)
           ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
           : col_meta.data_page_offset;
 
-      output.chunks.push_back(
+      file_data.chunks.push_back(
         gpu::ColumnChunkDesc(col_meta.total_compressed_size,
                              nullptr,
                              col_meta.num_values,
@@ -1650,7 +1653,7 @@ gpu::file_intermediate_data reader::impl::preprocess_file(
                              col.schema_idx));
 
       // Map each column chunk to its column index and its source index
-      chunk_source_map[output.chunks.size() - 1] = row_group_source;
+      chunk_source_map[file_data.chunks.size() - 1] = row_group_source;
 
       if (col_meta.codec != Compression::UNCOMPRESSED) {
         total_decompressed_size += col_meta.total_uncompressed_size;
@@ -1658,9 +1661,9 @@ gpu::file_intermediate_data reader::impl::preprocess_file(
     }
     // Read compressed chunk data to device memory
     read_rowgroup_tasks.push_back(read_column_chunks(page_data,
-                                                     output.chunks,
+                                                     file_data.chunks,
                                                      io_chunk_idx,
-                                                     output.chunks.size(),
+                                                     file_data.chunks.size(),
                                                      column_chunk_offsets,
                                                      chunk_source_map));
 
@@ -1672,19 +1675,21 @@ gpu::file_intermediate_data reader::impl::preprocess_file(
   assert(remaining_rows <= 0);
 
   // Process dataset chunk pages into output columns
-  const auto total_pages = count_page_headers(output.chunks);
-  output.pages_info      = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
+  const auto total_pages = count_page_headers(file_data.chunks);
+  file_data.pages_info   = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
 
   if (total_pages > 0) {
     rmm::device_buffer decomp_page_data;
 
     // decoding of column/page information
-    decode_page_headers(output.chunks, output.pages_info);
+    decode_page_headers(file_data.chunks, file_data.pages_info);
     if (total_decompressed_size > 0) {
-      decomp_page_data = decompress_page_data(output.chunks, output.pages_info);
+      decomp_page_data = decompress_page_data(file_data.chunks, file_data.pages_info);
       // Free compressed data
-      for (size_t c = 0; c < output.chunks.size(); c++) {
-        if (output.chunks[c].codec != parquet::Compression::UNCOMPRESSED) { page_data[c].reset(); }
+      for (size_t c = 0; c < file_data.chunks.size(); c++) {
+        if (file_data.chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
+          page_data[c].reset();
+        }
       }
     }
 
@@ -1706,10 +1711,10 @@ gpu::file_intermediate_data reader::impl::preprocess_file(
     // nesting information (sizes, etc) stored -per page-
     // note : even for flat schemas, we allocate 1 level of "nesting" info
 
-    allocate_nesting_info(output.chunks, output.pages_info, output.page_nesting_info);
+    allocate_nesting_info(file_data.chunks, file_data.pages_info, file_data.page_nesting_info);
   }
 
-  return output;
+  return {std::move(file_data), skip_rows, num_rows};
 }
 
 table_with_metadata reader::impl::read_chunk(gpu::file_intermediate_data& file_data,
@@ -1779,7 +1784,8 @@ table_with_metadata reader::impl::read(size_type skip_rows,
                                        bool uses_custom_row_bounds,
                                        std::vector<std::vector<size_type>> const& row_group_list)
 {
-  auto file_data = preprocess_file(skip_rows, num_rows, row_group_list);
+  auto [file_data, skip_rows_corrected, num_rows_corrected] =
+    preprocess_file(skip_rows, num_rows, row_group_list);
 
   // todo: fix this (should be empty instead of null)
   if (!file_data.has_data) { return table_with_metadata{}; }
@@ -1798,8 +1804,8 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   //      auto const chunked_read_size = 0;
   auto chunk_reads = preprocess_columns(file_data.chunks,
                                         file_data.pages_info,
-                                        skip_rows,
-                                        num_rows,
+                                        skip_rows_corrected,
+                                        num_rows_corrected,
                                         uses_custom_row_bounds,
                                         chunked_read_size);
 
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index c76dcbaa558..67677519ed0 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -88,7 +88,7 @@ class reader::impl {
    * @brief load_column_chunk_descriotions
    * @return
    */
-  gpu::file_intermediate_data preprocess_file(
+  std::tuple<gpu::file_intermediate_data, size_type, size_type> preprocess_file(
     size_type skip_rows,
     size_type num_rows,
     std::vector<std::vector<size_type>> const& row_group_list);

From baf3603f6600eafe13b92cb657ee38b2fbd1c46f Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 13 Oct 2022 09:59:41 -0700
Subject: [PATCH 035/162] Fix comment

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 35b5c5f442f..a2868cdcc8b 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1787,7 +1787,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   auto [file_data, skip_rows_corrected, num_rows_corrected] =
     preprocess_file(skip_rows, num_rows, row_group_list);
 
-  // todo: fix this (should be empty instead of null)
+  // todo: fix this (empty output may be incorrect)
   if (!file_data.has_data) { return table_with_metadata{}; }
 
   // - compute column sizes and allocate output buffers.

From 8bdab449af520e0a4ef52e5ff62219f8189686e2 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 13 Oct 2022 13:04:06 -0700
Subject: [PATCH 036/162] Store preprocess data

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu       | 16 ++++++++--------
 cpp/src/io/parquet/reader_impl.hpp      | 18 +++++++++++-------
 cpp/src/io/parquet/reader_preprocess.cu | 18 ++++++++++--------
 3 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index a2868cdcc8b..f5cd9b963af 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1802,16 +1802,16 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   // TODO: make this a parameter.
   auto const chunked_read_size = 240000;
   //      auto const chunked_read_size = 0;
-  auto chunk_reads = preprocess_columns(file_data.chunks,
-                                        file_data.pages_info,
-                                        skip_rows_corrected,
-                                        num_rows_corrected,
-                                        uses_custom_row_bounds,
-                                        chunked_read_size);
+  preprocess_columns(file_data.chunks,
+                     file_data.pages_info,
+                     skip_rows_corrected,
+                     num_rows_corrected,
+                     uses_custom_row_bounds,
+                     chunked_read_size);
 
   // process each chunk. this is the part that would be externalized into multiple calls
-  auto read_info = chunk_reads.second[0];
-  return read_chunk(file_data, chunk_reads.first, read_info, uses_custom_row_bounds);
+  auto read_info = chunked_read_info[0];
+  return read_chunk(file_data, chunked_itm_data, read_info, uses_custom_row_bounds);
 }
 
 // Forward to implementation
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 67677519ed0..7cc71169e20 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -203,13 +203,12 @@ class reader::impl {
    * bounds
    * a preprocess.
    */
-  std::pair<gpu::chunked_intermediate_data, std::vector<gpu::chunked_read_info>> preprocess_columns(
-    hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-    hostdevice_vector<gpu::PageInfo>& pages,
-    size_t min_row,
-    size_t total_rows,
-    bool uses_custom_row_bounds,
-    size_type chunked_read_size);
+  void preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                          hostdevice_vector<gpu::PageInfo>& pages,
+                          size_t min_row,
+                          size_t total_rows,
+                          bool uses_custom_row_bounds,
+                          size_type chunked_read_size);
 
   /**
    * TODO
@@ -260,6 +259,11 @@ class reader::impl {
   bool _strings_to_categorical = false;
   std::optional<std::vector<reader_column_schema>> _reader_column_schema;
   data_type _timestamp_type{type_id::EMPTY};
+
+  // Variables used for chunked reading:
+  cudf::io::parquet::gpu::chunked_intermediate_data chunked_itm_data;
+  std::vector<cudf::io::parquet::gpu::chunked_read_info> chunked_read_info;
+  bool columns_preprocessed{false};
 };
 
 }  // namespace parquet
diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index 111ca04502e..8bf69b43ea5 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -366,14 +366,15 @@ struct start_offset_output_iterator {
 /**
  * @copydoc cudf::io::detail::parquet::preprocess_columns
  */
-std::pair<gpu::chunked_intermediate_data, std::vector<gpu::chunked_read_info>>
-reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                 hostdevice_vector<gpu::PageInfo>& pages,
-                                 size_t min_row,
-                                 size_t num_rows,
-                                 bool uses_custom_row_bounds,
-                                 size_type chunked_read_size)
+void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                                      hostdevice_vector<gpu::PageInfo>& pages,
+                                      size_t min_row,
+                                      size_t num_rows,
+                                      bool uses_custom_row_bounds,
+                                      size_type chunked_read_size)
 {
+  if (columns_preprocessed) { return; }
+
   // iterate over all input columns and determine if they contain lists so we can further
   // preprocess them.
   bool has_lists = false;
@@ -481,7 +482,8 @@ reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks
     chunked_read_size > 0 ? compute_splits(pages, id, num_rows, chunked_read_size, _stream)
                           : std::vector<gpu::chunked_read_info>{{min_row, num_rows}};
 
-  return {std::move(id), std::move(read_chunks)};
+  chunked_itm_data  = std::move(id);
+  chunked_read_info = std::move(read_chunks);
 }
 
 void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,

From ec4abfb674dcb718b1f18c46f78b1cc87ab1eed3 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 13 Oct 2022 14:21:32 -0700
Subject: [PATCH 037/162] Implement `chunked_reader` detail class

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/detail/parquet.hpp | 51 ++++++++++++++++++++++++--
 cpp/include/cudf/io/parquet.hpp        | 46 +++++++----------------
 cpp/src/io/functions.cpp               | 43 +++++++---------------
 cpp/src/io/parquet/reader_impl.cu      | 31 +++++++++++++---
 cpp/src/io/parquet/reader_impl.hpp     | 33 +++++++++++------
 5 files changed, 123 insertions(+), 81 deletions(-)

diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp
index 7675dc70cb2..3bba9ddc841 100644
--- a/cpp/include/cudf/io/detail/parquet.hpp
+++ b/cpp/include/cudf/io/detail/parquet.hpp
@@ -36,6 +36,7 @@ namespace io {
 // Forward declaration
 class parquet_reader_options;
 class parquet_writer_options;
+class chunked_parquet_reader_options;
 class chunked_parquet_writer_options;
 
 namespace detail {
@@ -45,7 +46,7 @@ namespace parquet {
  * @brief Class to read Parquet dataset data into columns.
  */
 class reader {
- private:
+ protected:
   class impl;
   std::unique_ptr<impl> _impl;
 
@@ -66,7 +67,7 @@ class reader {
   /**
    * @brief Destructor explicitly-declared to avoid inlined in header
    */
-  ~reader();
+  virtual ~reader();
 
   /**
    * @brief Reads the dataset as per given options.
@@ -75,7 +76,51 @@ class reader {
    *
    * @return The set of columns along with table metadata
    */
-  table_with_metadata read(parquet_reader_options const& options);
+  virtual table_with_metadata read(parquet_reader_options const& options);
+};
+
+/**
+ * TODO
+ *
+ * @brief The chunked_reader class
+ */
+class chunked_reader : reader {
+ public:
+  /**
+   * TODO
+   *
+   * @brief Constructor from an array of datasources
+   *
+   * @param sources Input `datasource` objects to read the dataset from
+   * @param options Settings for controlling reading behavior
+   * @param stream CUDA stream used for device memory operations and kernel launches.
+   * @param mr Device memory resource to use for device memory allocation
+   */
+  explicit chunked_reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
+                          chunked_parquet_reader_options const& options,
+                          rmm::cuda_stream_view stream,
+                          rmm::mr::device_memory_resource* mr);
+
+  /**
+   * @brief Destructor explicitly-declared to avoid inlined in header
+   */
+  ~chunked_reader();
+
+  /**
+   * TODO
+   *
+   * @brief has_next
+   * @return
+   */
+  bool has_next();
+
+  /**
+   * TODO
+   *
+   * @brief read_chunk
+   * @return
+   */
+  table_with_metadata read_chunk();
 };
 
 /**
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index cf836edf4a8..8a17f3344f0 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -526,23 +526,9 @@ class chunked_parquet_reader {
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
-   * @brief Destructor, calling `close()` for the reading file to release resources.
+   * @brief Destructor, destroying the internal reader instance.
    */
-  ~chunked_parquet_reader() { close(); }
-
-  /**
-   * @brief Read a chunk of Parquet dataset into a set of columns.
-   *
-   * The sequence of returned tables, if concatenated by their order, guarantee to form a complete
-   * dataset as reading the entire given file at once.
-   *
-   * An empty table will be returned if all the data in the given file has been read and returned by
-   * the previous calls, or the `close()` function has been called.
-   *
-   * @return The set of columns along with metadata
-   */
-
-  table_with_metadata read_chunk();
+  ~chunked_parquet_reader();
 
   /**
    * @brief Check if there is any data of the given file has not yet processed.
@@ -552,19 +538,20 @@ class chunked_parquet_reader {
    *
    * @return A boolean value indicating if there is any data left to process
    */
-  bool has_next()
-  {
-    // TODO:
-    // if(reader->is_close()) { return false; }
-    return skip_rows >= total_rows;
-  }
+  bool has_next();
 
   /**
-   * @brief Close the reading file to release internal resources.
+   * @brief Read a chunk of Parquet dataset into a set of columns.
+   *
+   * The sequence of returned tables, if concatenated by their order, guarantee to form a complete
+   * dataset as reading the entire given file at once.
    *
-   * This should not have any effect if being called on an already closed file.
+   * An empty table will be returned if all the data in the given file has been read and returned by
+   * the previous calls, or the `close()` function has been called.
+   *
+   * @return The set of columns along with metadata
    */
-  void close();
+  table_with_metadata read_chunk();
 
  private:
   /**
@@ -575,14 +562,9 @@ class chunked_parquet_reader {
    *  - Decompressing and processing pages.
    *  - Any other necessary preprocessing steps.
    */
-  void preprocess();
-
-  // The internal instance of the reader class to perform chunked reading.
-  // TODO: Replace this class with a reader class that has interface supporting chunked reading
-  std::unique_ptr<cudf::io::detail::parquet::reader> reader;
+  //  void preprocess();
 
-  size_type skip_rows{0};
-  size_type total_rows{0};
+  std::unique_ptr<cudf::io::detail::parquet::chunked_reader> reader;
 };
 
 /** @} */  // end of group
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 5925b822414..58348220b8b 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -470,14 +470,21 @@ std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const
  */
 chunked_parquet_reader::chunked_parquet_reader(chunked_parquet_reader_options const& options,
                                                rmm::mr::device_memory_resource* mr)
-  : reader{
-      std::make_unique<detail_parquet::reader>(make_datasources(options.get_source()),
-                                               dynamic_cast<parquet_reader_options const&>(options),
-                                               cudf::default_stream_value,
-                                               mr)}
+  : reader{std::make_unique<detail_parquet::chunked_reader>(
+      make_datasources(options.get_source()), options, cudf::default_stream_value, mr)}
 {
 }
 
+/**
+ * @copydoc cudf::io::chunked_parquet_reader::~chunked_parquet_reader
+ */
+chunked_parquet_reader::~chunked_parquet_reader() = default;
+
+/**
+ * @copydoc cudf::io::chunked_parquet_reader::has_next
+ */
+bool chunked_parquet_reader::has_next() { return reader->has_next(); }
+
 /**
  * @copydoc cudf::io::chunked_parquet_reader::read_chunk
  */
@@ -486,35 +493,11 @@ table_with_metadata chunked_parquet_reader::read_chunk()
   // On the first call, a preprocessing step is called which may be expensive before a table is
   // returned. All subsequent calls are essentially just doing incremental column allocation and row
   // decoding (using all the data stored from the preprocessing step).
-  preprocess();
 
   // In each call to this function, the internal `skip_rows` state is updated such that the next
   // call will skip the rows returned by the previous call, making sure that the sequence of
   // returned tables are continuous and form a complete dataset as reading the entire file at once.
-  auto output = reader->read(parquet_reader_options{});
-  skip_rows += output.tbl->num_rows();
-
-  return output;
-}
-
-/**
- * @copydoc cudf::io::chunked_parquet_reader::preprocess
- */
-void chunked_parquet_reader::preprocess()
-{
-  // TODO
-  // This step should be a no-op after if it is called from the second time.
-  // reader->preprocess();
-}
-
-/**
- * @copydoc cudf::io::chunked_parquet_reader::close
- */
-void chunked_parquet_reader::close()
-{
-  // TODO
-  // This step should be a no-op if it was called before.
-  // reader->close();
+  return reader->read_chunk();
 }
 
 /**
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index f5cd9b963af..3be0cbfc1ca 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1717,10 +1717,10 @@ std::tuple<gpu::file_intermediate_data, size_type, size_type> reader::impl::prep
   return {std::move(file_data), skip_rows, num_rows};
 }
 
-table_with_metadata reader::impl::read_chunk(gpu::file_intermediate_data& file_data,
-                                             gpu::chunked_intermediate_data& chunk_data,
-                                             gpu::chunked_read_info const& read_info,
-                                             bool uses_custom_row_bounds)
+table_with_metadata reader::impl::read_chunk_internal(gpu::file_intermediate_data& file_data,
+                                                      gpu::chunked_intermediate_data& chunk_data,
+                                                      gpu::chunked_read_info const& read_info,
+                                                      bool uses_custom_row_bounds)
 {
   table_metadata out_metadata;
 
@@ -1811,9 +1811,11 @@ table_with_metadata reader::impl::read(size_type skip_rows,
 
   // process each chunk. this is the part that would be externalized into multiple calls
   auto read_info = chunked_read_info[0];
-  return read_chunk(file_data, chunked_itm_data, read_info, uses_custom_row_bounds);
+  return read_chunk_internal(file_data, chunked_itm_data, read_info, uses_custom_row_bounds);
 }
 
+table_with_metadata reader::impl::read_chunk() { return table_with_metadata{}; }
+
 // Forward to implementation
 reader::reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
                parquet_reader_options const& options,
@@ -1837,6 +1839,25 @@ table_with_metadata reader::read(parquet_reader_options const& options)
                      options.get_row_groups());
 }
 
+// Forward to implementation
+chunked_reader::chunked_reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
+                               chunked_parquet_reader_options const& options,
+                               rmm::cuda_stream_view stream,
+                               rmm::mr::device_memory_resource* mr)
+  : reader(std::forward<std::vector<std::unique_ptr<cudf::io::datasource>>>(sources),
+           dynamic_cast<parquet_reader_options const&>(options),
+           stream,
+           mr)
+{
+}
+
+// Destructor within this translation unit
+chunked_reader::~chunked_reader() = default;
+
+bool chunked_reader::has_next() { return _impl->has_next(); }
+
+table_with_metadata chunked_reader::read_chunk() { return _impl->read_chunk(); }
+
 }  // namespace parquet
 }  // namespace detail
 }  // namespace io
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 7cc71169e20..d134b89252c 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -85,27 +85,37 @@ class reader::impl {
   /**
    * TODO
    *
-   * @brief load_column_chunk_descriotions
+   * @brief read_chunk
+   * @param chunk_info
    * @return
    */
-  std::tuple<gpu::file_intermediate_data, size_type, size_type> preprocess_file(
-    size_type skip_rows,
-    size_type num_rows,
-    std::vector<std::vector<size_type>> const& row_group_list);
+  table_with_metadata read_chunk();
 
   /**
    * TODO
    *
-   * @brief read_chunk
-   * @param chunk_info
+   * @brief read_completed
    * @return
    */
-  table_with_metadata read_chunk(gpu::file_intermediate_data& file_data,
-                                 gpu::chunked_intermediate_data& chunk_data,
-                                 gpu::chunked_read_info const& read_info,
-                                 bool uses_custom_row_bounds);
+  bool has_next() { return current_read_chunk >= chunked_read_info.size(); }
 
  private:
+  table_with_metadata read_chunk_internal(gpu::file_intermediate_data& file_data,
+                                          gpu::chunked_intermediate_data& chunk_data,
+                                          gpu::chunked_read_info const& read_info,
+                                          bool uses_custom_row_bounds);
+
+  /**
+   * TODO
+   *
+   * @brief load_column_chunk_descriotions
+   * @return
+   */
+  std::tuple<gpu::file_intermediate_data, size_type, size_type> preprocess_file(
+    size_type skip_rows,
+    size_type num_rows,
+    std::vector<std::vector<size_type>> const& row_group_list);
+
   /**
    * TODO
    *
@@ -263,6 +273,7 @@ class reader::impl {
   // Variables used for chunked reading:
   cudf::io::parquet::gpu::chunked_intermediate_data chunked_itm_data;
   std::vector<cudf::io::parquet::gpu::chunked_read_info> chunked_read_info;
+  std::size_t current_read_chunk{0};
   bool columns_preprocessed{false};
 };
 

From cb1dea44645ac2123523cbf09317892d93563b60 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 13 Oct 2022 14:57:02 -0700
Subject: [PATCH 038/162] Refactoring

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu       | 71 ++++++++++++-------------
 cpp/src/io/parquet/reader_impl.hpp      | 16 +++---
 cpp/src/io/parquet/reader_preprocess.cu |  6 +--
 3 files changed, 42 insertions(+), 51 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 3be0cbfc1ca..84a98b7fb0d 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1568,13 +1568,11 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                               _timestamp_type.id());
 }
 
-std::tuple<gpu::file_intermediate_data, size_type, size_type> reader::impl::preprocess_file(
+std::pair<size_type, size_type> reader::impl::preprocess_file(
   size_type skip_rows,
   size_type num_rows,
   const std::vector<std::vector<size_type>>& row_group_list)
 {
-  gpu::file_intermediate_data file_data{};
-
   // Select only row groups required
   // Note: `skip_rows` and `num_rows` will be modified in this function.
   const auto selected_row_groups =
@@ -1582,15 +1580,15 @@ std::tuple<gpu::file_intermediate_data, size_type, size_type> reader::impl::prep
 
   // TODO: fix this
   if (selected_row_groups.size() == 0 || _input_columns.size() == 0) {
-    return {std::move(file_data), skip_rows, num_rows};
+    return {skip_rows, num_rows};
   }
 
-  file_data.has_data = true;
+  file_itm_data.has_data = true;
 
   // Descriptors for all the chunks that make up the selected columns
   const auto num_input_columns = _input_columns.size();
   const auto num_chunks        = selected_row_groups.size() * num_input_columns;
-  file_data.chunks             = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
+  file_itm_data.chunks         = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
 
   // Association between each column chunk and its source
   std::vector<size_type> chunk_source_map(num_chunks);
@@ -1610,7 +1608,7 @@ std::tuple<gpu::file_intermediate_data, size_type, size_type> reader::impl::prep
     auto const row_group_start  = rg.start_row;
     auto const row_group_source = rg.source_index;
     auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
-    auto const io_chunk_idx     = file_data.chunks.size();
+    auto const io_chunk_idx     = file_itm_data.chunks.size();
 
     // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
     for (size_t i = 0; i < num_input_columns; ++i) {
@@ -1626,12 +1624,12 @@ std::tuple<gpu::file_intermediate_data, size_type, size_type> reader::impl::prep
                         schema.converted_type,
                         schema.type_length);
 
-      column_chunk_offsets[file_data.chunks.size()] =
+      column_chunk_offsets[file_itm_data.chunks.size()] =
         (col_meta.dictionary_page_offset != 0)
           ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
           : col_meta.data_page_offset;
 
-      file_data.chunks.push_back(
+      file_itm_data.chunks.push_back(
         gpu::ColumnChunkDesc(col_meta.total_compressed_size,
                              nullptr,
                              col_meta.num_values,
@@ -1653,7 +1651,7 @@ std::tuple<gpu::file_intermediate_data, size_type, size_type> reader::impl::prep
                              col.schema_idx));
 
       // Map each column chunk to its column index and its source index
-      chunk_source_map[file_data.chunks.size() - 1] = row_group_source;
+      chunk_source_map[file_itm_data.chunks.size() - 1] = row_group_source;
 
       if (col_meta.codec != Compression::UNCOMPRESSED) {
         total_decompressed_size += col_meta.total_uncompressed_size;
@@ -1661,9 +1659,9 @@ std::tuple<gpu::file_intermediate_data, size_type, size_type> reader::impl::prep
     }
     // Read compressed chunk data to device memory
     read_rowgroup_tasks.push_back(read_column_chunks(page_data,
-                                                     file_data.chunks,
+                                                     file_itm_data.chunks,
                                                      io_chunk_idx,
-                                                     file_data.chunks.size(),
+                                                     file_itm_data.chunks.size(),
                                                      column_chunk_offsets,
                                                      chunk_source_map));
 
@@ -1675,19 +1673,19 @@ std::tuple<gpu::file_intermediate_data, size_type, size_type> reader::impl::prep
   assert(remaining_rows <= 0);
 
   // Process dataset chunk pages into output columns
-  const auto total_pages = count_page_headers(file_data.chunks);
-  file_data.pages_info   = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
+  const auto total_pages   = count_page_headers(file_itm_data.chunks);
+  file_itm_data.pages_info = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
 
   if (total_pages > 0) {
     rmm::device_buffer decomp_page_data;
 
     // decoding of column/page information
-    decode_page_headers(file_data.chunks, file_data.pages_info);
+    decode_page_headers(file_itm_data.chunks, file_itm_data.pages_info);
     if (total_decompressed_size > 0) {
-      decomp_page_data = decompress_page_data(file_data.chunks, file_data.pages_info);
+      decomp_page_data = decompress_page_data(file_itm_data.chunks, file_itm_data.pages_info);
       // Free compressed data
-      for (size_t c = 0; c < file_data.chunks.size(); c++) {
-        if (file_data.chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
+      for (size_t c = 0; c < file_itm_data.chunks.size(); c++) {
+        if (file_itm_data.chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
           page_data[c].reset();
         }
       }
@@ -1711,17 +1709,16 @@ std::tuple<gpu::file_intermediate_data, size_type, size_type> reader::impl::prep
     // nesting information (sizes, etc) stored -per page-
     // note : even for flat schemas, we allocate 1 level of "nesting" info
 
-    allocate_nesting_info(file_data.chunks, file_data.pages_info, file_data.page_nesting_info);
+    allocate_nesting_info(
+      file_itm_data.chunks, file_itm_data.pages_info, file_itm_data.page_nesting_info);
   }
 
-  return {std::move(file_data), skip_rows, num_rows};
+  return {skip_rows, num_rows};
 }
 
-table_with_metadata reader::impl::read_chunk_internal(gpu::file_intermediate_data& file_data,
-                                                      gpu::chunked_intermediate_data& chunk_data,
-                                                      gpu::chunked_read_info const& read_info,
-                                                      bool uses_custom_row_bounds)
+table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bounds)
 {
+  auto const& read_info = chunk_read_info[current_read_chunk];
   table_metadata out_metadata;
 
   // output cudf columns as determined by the top level schema
@@ -1729,17 +1726,17 @@ table_with_metadata reader::impl::read_chunk_internal(gpu::file_intermediate_dat
   out_columns.reserve(_output_columns.size());
 
   // allocate outgoing columns
-  allocate_columns(file_data.chunks,
-                   file_data.pages_info,
-                   chunk_data,
+  allocate_columns(file_itm_data.chunks,
+                   file_itm_data.pages_info,
+                   chunk_itm_data,
                    read_info.skip_rows,
                    read_info.num_rows,
                    uses_custom_row_bounds);
 
   // decoding column data
-  decode_page_data(file_data.chunks,
-                   file_data.pages_info,
-                   file_data.page_nesting_info,
+  decode_page_data(file_itm_data.chunks,
+                   file_itm_data.pages_info,
+                   file_itm_data.page_nesting_info,
                    read_info.skip_rows,
                    read_info.num_rows);
 
@@ -1784,11 +1781,11 @@ table_with_metadata reader::impl::read(size_type skip_rows,
                                        bool uses_custom_row_bounds,
                                        std::vector<std::vector<size_type>> const& row_group_list)
 {
-  auto [file_data, skip_rows_corrected, num_rows_corrected] =
+  auto [skip_rows_corrected, num_rows_corrected] =
     preprocess_file(skip_rows, num_rows, row_group_list);
 
   // todo: fix this (empty output may be incorrect)
-  if (!file_data.has_data) { return table_with_metadata{}; }
+  if (!file_itm_data.has_data) { return table_with_metadata{}; }
 
   // - compute column sizes and allocate output buffers.
   //   important:
@@ -1802,19 +1799,17 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   // TODO: make this a parameter.
   auto const chunked_read_size = 240000;
   //      auto const chunked_read_size = 0;
-  preprocess_columns(file_data.chunks,
-                     file_data.pages_info,
+  preprocess_columns(file_itm_data.chunks,
+                     file_itm_data.pages_info,
                      skip_rows_corrected,
                      num_rows_corrected,
                      uses_custom_row_bounds,
                      chunked_read_size);
 
-  // process each chunk. this is the part that would be externalized into multiple calls
-  auto read_info = chunked_read_info[0];
-  return read_chunk_internal(file_data, chunked_itm_data, read_info, uses_custom_row_bounds);
+  return read_chunk_internal(uses_custom_row_bounds);
 }
 
-table_with_metadata reader::impl::read_chunk() { return table_with_metadata{}; }
+table_with_metadata reader::impl::read_chunk() { return read_chunk_internal(false); }
 
 // Forward to implementation
 reader::reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index d134b89252c..471d2de051f 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -97,13 +97,10 @@ class reader::impl {
    * @brief read_completed
    * @return
    */
-  bool has_next() { return current_read_chunk >= chunked_read_info.size(); }
+  bool has_next() { return current_read_chunk >= chunk_read_info.size(); }
 
  private:
-  table_with_metadata read_chunk_internal(gpu::file_intermediate_data& file_data,
-                                          gpu::chunked_intermediate_data& chunk_data,
-                                          gpu::chunked_read_info const& read_info,
-                                          bool uses_custom_row_bounds);
+  table_with_metadata read_chunk_internal(bool uses_custom_row_bounds);
 
   /**
    * TODO
@@ -111,7 +108,7 @@ class reader::impl {
    * @brief load_column_chunk_descriotions
    * @return
    */
-  std::tuple<gpu::file_intermediate_data, size_type, size_type> preprocess_file(
+  std::pair<size_type, size_type> preprocess_file(
     size_type skip_rows,
     size_type num_rows,
     std::vector<std::vector<size_type>> const& row_group_list);
@@ -271,10 +268,11 @@ class reader::impl {
   data_type _timestamp_type{type_id::EMPTY};
 
   // Variables used for chunked reading:
-  cudf::io::parquet::gpu::chunked_intermediate_data chunked_itm_data;
-  std::vector<cudf::io::parquet::gpu::chunked_read_info> chunked_read_info;
+  cudf::io::parquet::gpu::file_intermediate_data file_itm_data;
+  cudf::io::parquet::gpu::chunked_intermediate_data chunk_itm_data;
+  std::vector<cudf::io::parquet::gpu::chunked_read_info> chunk_read_info;
   std::size_t current_read_chunk{0};
-  bool columns_preprocessed{false};
+  bool preprocessed{false};
 };
 
 }  // namespace parquet
diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index 8bf69b43ea5..cd571d5b9d1 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -373,8 +373,6 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
                                       bool uses_custom_row_bounds,
                                       size_type chunked_read_size)
 {
-  if (columns_preprocessed) { return; }
-
   // iterate over all input columns and determine if they contain lists so we can further
   // preprocess them.
   bool has_lists = false;
@@ -482,8 +480,8 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
     chunked_read_size > 0 ? compute_splits(pages, id, num_rows, chunked_read_size, _stream)
                           : std::vector<gpu::chunked_read_info>{{min_row, num_rows}};
 
-  chunked_itm_data  = std::move(id);
-  chunked_read_info = std::move(read_chunks);
+  chunk_itm_data  = std::move(id);
+  chunk_read_info = std::move(read_chunks);
 }
 
 void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,

From a8dfd82c080f32284e985ad9188cea42d7a5f1cb Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 13 Oct 2022 15:11:01 -0700
Subject: [PATCH 039/162] Rename structs

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/parquet_gpu.hpp      |  4 ++--
 cpp/src/io/parquet/reader_impl.hpp      |  6 +++---
 cpp/src/io/parquet/reader_preprocess.cu | 16 ++++++++--------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index f4207354831..69068169fb8 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -245,12 +245,12 @@ struct file_intermediate_data {
 };
 
 // TODO: rename?
-struct chunked_intermediate_data {
+struct chunk_intermediate_data {
   rmm::device_uvector<int> page_keys{0, rmm::cuda_stream_default};
   rmm::device_uvector<int> page_index{0, rmm::cuda_stream_default};
 };
 
-struct chunked_read_info {
+struct chunk_read_info {
   size_t skip_rows;
   size_t num_rows;
 };
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 471d2de051f..32cf6eca275 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -229,7 +229,7 @@ class reader::impl {
    */
   void allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                         hostdevice_vector<gpu::PageInfo>& pages,
-                        gpu::chunked_intermediate_data const& id,
+                        gpu::chunk_intermediate_data const& id,
                         size_t min_row,
                         size_t total_rows,
                         bool uses_custom_row_bounds);
@@ -269,8 +269,8 @@ class reader::impl {
 
   // Variables used for chunked reading:
   cudf::io::parquet::gpu::file_intermediate_data file_itm_data;
-  cudf::io::parquet::gpu::chunked_intermediate_data chunk_itm_data;
-  std::vector<cudf::io::parquet::gpu::chunked_read_info> chunk_read_info;
+  cudf::io::parquet::gpu::chunk_intermediate_data chunk_itm_data;
+  std::vector<cudf::io::parquet::gpu::chunk_read_info> chunk_read_info;
   std::size_t current_read_chunk{0};
   bool preprocessed{false};
 };
diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index cd571d5b9d1..9578a5126ba 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -119,8 +119,8 @@ struct row_total_size {
   }
 };
 
-std::vector<gpu::chunked_read_info> compute_splits(hostdevice_vector<gpu::PageInfo>& pages,
-                                                   gpu::chunked_intermediate_data const& id,
+std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo>& pages,
+                                                   gpu::chunk_intermediate_data const& id,
                                                    size_type num_rows,
                                                    size_type chunked_read_size,
                                                    rmm::cuda_stream_view stream)
@@ -237,7 +237,7 @@ std::vector<gpu::chunked_read_info> compute_splits(hostdevice_vector<gpu::PageIn
   // splits.
   // TODO: come up with a clever way to do this entirely in parallel. For now, as long as batch
   // sizes are reasonably large, this shouldn't iterate too many times
-  std::vector<gpu::chunked_read_info> splits;
+  std::vector<gpu::chunk_read_info> splits;
   {
     size_t cur_pos         = 0;
     size_t cumulative_size = 0;
@@ -261,7 +261,7 @@ std::vector<gpu::chunked_read_info> compute_splits(hostdevice_vector<gpu::PageIn
 
       auto const start_row = cur_row_count;
       cur_row_count        = h_adjusted[p].row_count;
-      splits.push_back(gpu::chunked_read_info{start_row, cur_row_count - start_row});
+      splits.push_back(gpu::chunk_read_info{start_row, cur_row_count - start_row});
       cur_pos         = p;
       cumulative_size = h_adjusted[p].size_bytes;
     }
@@ -396,7 +396,7 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
   }
 
   // intermediate data we will need for further chunked reads
-  gpu::chunked_intermediate_data id;
+  gpu::chunk_intermediate_data id;
   if (has_lists || chunked_read_size > 0) {
     // computes:
     // PageNestingInfo::size for each level of nesting, for each page.
@@ -476,9 +476,9 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
   }
 
   // compute splits if necessary.
-  std::vector<gpu::chunked_read_info> read_chunks =
+  std::vector<gpu::chunk_read_info> read_chunks =
     chunked_read_size > 0 ? compute_splits(pages, id, num_rows, chunked_read_size, _stream)
-                          : std::vector<gpu::chunked_read_info>{{min_row, num_rows}};
+                          : std::vector<gpu::chunk_read_info>{{min_row, num_rows}};
 
   chunk_itm_data  = std::move(id);
   chunk_read_info = std::move(read_chunks);
@@ -486,7 +486,7 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
 
 void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                                     hostdevice_vector<gpu::PageInfo>& pages,
-                                    gpu::chunked_intermediate_data const& id,
+                                    gpu::chunk_intermediate_data const& id,
                                     size_t min_row,
                                     size_t num_rows,
                                     bool uses_custom_row_bounds)

From 7889e5a9b3f57734dcdb4eceb0d92d6af431ca22 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 13 Oct 2022 15:29:20 -0700
Subject: [PATCH 040/162] Increment `current_read_chunk`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 84a98b7fb0d..5074c5ba15c 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1718,7 +1718,7 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
 
 table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bounds)
 {
-  auto const& read_info = chunk_read_info[current_read_chunk];
+  auto const& read_info = chunk_read_info[current_read_chunk++];
   table_metadata out_metadata;
 
   // output cudf columns as determined by the top level schema

From 63a65110ea8ba73de64cfba528cccc71e4c472aa Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 13 Oct 2022 16:33:41 -0700
Subject: [PATCH 041/162] Call preprocessing in `read_chunk`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 37 ++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 5074c5ba15c..1d6f76498cd 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1718,6 +1718,10 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
 
 table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bounds)
 {
+  if (!has_next()) {
+    // return empty
+  }
+
   auto const& read_info = chunk_read_info[current_read_chunk++];
   table_metadata out_metadata;
 
@@ -1809,7 +1813,38 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   return read_chunk_internal(uses_custom_row_bounds);
 }
 
-table_with_metadata reader::impl::read_chunk() { return read_chunk_internal(false); }
+table_with_metadata reader::impl::read_chunk()
+{
+  if (!preprocessed) {
+    [[maybe_unused]] auto [skip_rows_corrected, num_rows_corrected] = preprocess_file(0, -1, {});
+
+    // todo: fix this (empty output may be incorrect)
+    if (!file_itm_data.has_data) { return table_with_metadata{}; }
+
+    // - compute column sizes and allocate output buffers.
+    //   important:
+    //   for nested schemas, we have to do some further preprocessing to determine:
+    //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
+    //    of
+    //      nesting and it's size is the row count)
+    //
+    // - for nested schemas, output buffer offset values per-page, per nesting-level for the
+    // purposes of decoding.
+    // TODO: make this a parameter.
+    auto const chunked_read_size = 240000;
+    //      auto const chunked_read_size = 0;
+    preprocess_columns(file_itm_data.chunks,
+                       file_itm_data.pages_info,
+                       skip_rows_corrected,
+                       num_rows_corrected,
+                       true /*uses_custom_row_bounds*/,
+                       chunked_read_size);
+
+    preprocessed = true;
+  }
+
+  return read_chunk_internal(false);
+}
 
 // Forward to implementation
 reader::reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,

From c1269d1375b82bd0168453f9e5161d392a692436 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 14 Oct 2022 08:48:18 -0700
Subject: [PATCH 042/162] Fix `has_next`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 32cf6eca275..b74153c6cc0 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -97,7 +97,7 @@ class reader::impl {
    * @brief read_completed
    * @return
    */
-  bool has_next() { return current_read_chunk >= chunk_read_info.size(); }
+  bool has_next() { return current_read_chunk < chunk_read_info.size(); }
 
  private:
   table_with_metadata read_chunk_internal(bool uses_custom_row_bounds);

From 95e6c1da29995dfa4d635f682844acc7d0ac843d Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 14 Oct 2022 08:48:24 -0700
Subject: [PATCH 043/162] Refactoring

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 43 ++++++++++++++++---------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 1d6f76498cd..ca978f5e16d 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1583,6 +1583,8 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
     return {skip_rows, num_rows};
   }
 
+  // TODO: fix this.
+  // Need to check if the file actually has data.
   file_itm_data.has_data = true;
 
   // Descriptors for all the chunks that make up the selected columns
@@ -1819,27 +1821,26 @@ table_with_metadata reader::impl::read_chunk()
     [[maybe_unused]] auto [skip_rows_corrected, num_rows_corrected] = preprocess_file(0, -1, {});
 
     // todo: fix this (empty output may be incorrect)
-    if (!file_itm_data.has_data) { return table_with_metadata{}; }
-
-    // - compute column sizes and allocate output buffers.
-    //   important:
-    //   for nested schemas, we have to do some further preprocessing to determine:
-    //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
-    //    of
-    //      nesting and it's size is the row count)
-    //
-    // - for nested schemas, output buffer offset values per-page, per nesting-level for the
-    // purposes of decoding.
-    // TODO: make this a parameter.
-    auto const chunked_read_size = 240000;
-    //      auto const chunked_read_size = 0;
-    preprocess_columns(file_itm_data.chunks,
-                       file_itm_data.pages_info,
-                       skip_rows_corrected,
-                       num_rows_corrected,
-                       true /*uses_custom_row_bounds*/,
-                       chunked_read_size);
-
+    if (file_itm_data.has_data) {
+      // - compute column sizes and allocate output buffers.
+      //   important:
+      //   for nested schemas, we have to do some further preprocessing to determine:
+      //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
+      //    of
+      //      nesting and it's size is the row count)
+      //
+      // - for nested schemas, output buffer offset values per-page, per nesting-level for the
+      // purposes of decoding.
+      // TODO: make this a parameter.
+      auto const chunked_read_size = 240000;
+      //      auto const chunked_read_size = 0;
+      preprocess_columns(file_itm_data.chunks,
+                         file_itm_data.pages_info,
+                         skip_rows_corrected,
+                         num_rows_corrected,
+                         true /*uses_custom_row_bounds*/,
+                         chunked_read_size);
+    }
     preprocessed = true;
   }
 

From bd7b5101cfb77b5b0210cc1d7b78a587821b9bf6 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 14 Oct 2022 16:21:34 -0700
Subject: [PATCH 044/162] Fix errors

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp    |  15 ++++
 cpp/src/io/functions.cpp           |   7 ++
 cpp/src/io/parquet/reader_impl.cu  | 117 +++++++++++++++++++++++++++++
 cpp/src/io/parquet/reader_impl.hpp |   2 +-
 cpp/tests/io/parquet_test.cpp      |  81 ++++++++++++++++++--
 5 files changed, 214 insertions(+), 8 deletions(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 8a17f3344f0..337749a5ebe 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -415,6 +415,21 @@ class chunked_parquet_reader_options : public parquet_reader_options {
   friend class chunked_parquet_reader_options_builder;
 
  public:
+  /**
+   * @brief Default constructor.
+   *
+   * This has been added since Cython requires a default constructor to create objects on stack.
+   */
+  explicit chunked_parquet_reader_options() = default;
+
+  /**
+   * @brief Creates a parquet_reader_options_builder which will build parquet_reader_options.
+   *
+   * @param src Source information to read parquet file
+   * @return Builder to build reader options
+   */
+  static chunked_parquet_reader_options_builder builder(source_info const& src);
+
   /**
    * @brief Return the maximum number of bytes that will be read by
    * `chunked_parquet_reader::read()`.
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 58348220b8b..394cbac158b 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -86,6 +86,13 @@ parquet_reader_options_builder parquet_reader_options::builder(source_info const
   return parquet_reader_options_builder{src};
 }
 
+// Returns builder for parquet_reader_options
+chunked_parquet_reader_options_builder chunked_parquet_reader_options::builder(
+  source_info const& src)
+{
+  return chunked_parquet_reader_options_builder{src};
+}
+
 // Returns builder for parquet_writer_options
 parquet_writer_options_builder parquet_writer_options::builder(sink_info const& sink,
                                                                table_view const& table)
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index ca978f5e16d..de33e48cf9f 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1357,10 +1357,16 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
                                     size_t min_row,
                                     size_t total_rows)
 {
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   auto is_dict_chunk = [](const gpu::ColumnChunkDesc& chunk) {
     return (chunk.data_type & 0x7) == BYTE_ARRAY && chunk.num_dict_pages > 0;
   };
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   // Count the number of string dictionary entries
   // NOTE: Assumes first page in the chunk is always the dictionary page
   size_t total_str_dict_indexes = 0;
@@ -1374,6 +1380,9 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   auto str_dict_index = cudf::detail::make_zeroed_device_uvector_async<string_index_pair>(
     total_str_dict_indexes, _stream);
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   // TODO (dm): hd_vec should have begin and end iterator members
   size_t sum_max_depths =
     std::accumulate(chunks.host_ptr(),
@@ -1390,6 +1399,9 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   auto chunk_nested_data   = hostdevice_vector<void*>(sum_max_depths, _stream);
   auto chunk_offsets       = std::vector<size_t>();
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   // Update chunks with pointers to column data.
   for (size_t c = 0, page_count = 0, str_ofs = 0, chunk_off = 0; c < chunks.size(); c++) {
     input_column_info const& input_col = _input_columns[chunks[c].src_col_index];
@@ -1468,19 +1480,34 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
     page_count += chunks[c].max_num_pages;
   }
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   chunks.host_to_device(_stream);
   chunk_nested_valids.host_to_device(_stream);
   chunk_nested_data.host_to_device(_stream);
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   if (total_str_dict_indexes > 0) {
     gpu::BuildStringDictionaryIndex(chunks.device_ptr(), chunks.size(), _stream);
   }
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
+  printf("read total_rows = %d, min_row = %d\n", (int)total_rows, (int)min_row);
+  fflush(stdout);
+
   gpu::DecodePageData(pages, chunks, total_rows, min_row, _stream);
   pages.device_to_host(_stream);
   page_nesting.device_to_host(_stream);
   _stream.synchronize();
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   // for list columns, add the final offset to every offset buffer.
   // TODO : make this happen in more efficiently. Maybe use thrust::for_each
   // on each buffer.  Or potentially do it in PreprocessColumnData
@@ -1514,6 +1541,9 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
     }
   }
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   // update null counts in the final column buffers
   for (size_t idx = 0; idx < pages.size(); idx++) {
     gpu::PageInfo* pi = &pages[idx];
@@ -1537,7 +1567,13 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
     }
   }
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   _stream.synchronize();
+
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
 }
 
 reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
@@ -1731,6 +1767,9 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
   std::vector<std::unique_ptr<column>> out_columns;
   out_columns.reserve(_output_columns.size());
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   // allocate outgoing columns
   allocate_columns(file_itm_data.chunks,
                    file_itm_data.pages_info,
@@ -1739,6 +1778,11 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
                    read_info.num_rows,
                    uses_custom_row_bounds);
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
+  printf("read skip_rows = %d, num_rows = %d\n", (int)read_info.skip_rows, (int)read_info.num_rows);
+
   // decoding column data
   decode_page_data(file_itm_data.chunks,
                    file_itm_data.pages_info,
@@ -1746,6 +1790,9 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
                    read_info.skip_rows,
                    read_info.num_rows);
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   // create the final output cudf columns
   for (size_t i = 0; i < _output_columns.size(); ++i) {
     column_name_info& col_name = out_metadata.schema_info.emplace_back("");
@@ -1755,12 +1802,18 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
     out_columns.emplace_back(make_column(_output_columns[i], &col_name, metadata, _stream, _mr));
   }
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   return finalize_output(out_metadata, out_columns);
 }
 
 table_with_metadata reader::impl::finalize_output(table_metadata& out_metadata,
                                                   std::vector<std::unique_ptr<column>>& out_columns)
 {
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   // Create empty columns as needed (this can happen if we've ended up with no actual data to read)
   for (size_t i = out_columns.size(); i < _output_columns.size(); ++i) {
     column_name_info& col_name = out_metadata.schema_info.emplace_back("");
@@ -1779,6 +1832,9 @@ table_with_metadata reader::impl::finalize_output(table_metadata& out_metadata,
   out_metadata.user_data          = {out_metadata.per_file_user_data[0].begin(),
                             out_metadata.per_file_user_data[0].end()};
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   return {std::make_unique<table>(std::move(out_columns)), std::move(out_metadata)};
 }
 
@@ -1817,7 +1873,13 @@ table_with_metadata reader::impl::read(size_type skip_rows,
 
 table_with_metadata reader::impl::read_chunk()
 {
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   if (!preprocessed) {
+    printf("line %d\n", __LINE__);
+    fflush(stdout);
+
     [[maybe_unused]] auto [skip_rows_corrected, num_rows_corrected] = preprocess_file(0, -1, {});
 
     // todo: fix this (empty output may be incorrect)
@@ -1842,11 +1904,66 @@ table_with_metadata reader::impl::read_chunk()
                          chunked_read_size);
     }
     preprocessed = true;
+
+    printf("line %d\n", __LINE__);
+    fflush(stdout);
   }
 
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
   return read_chunk_internal(false);
 }
 
+bool reader::impl::has_next()
+{
+  printf("prepr: %d\n", (int)preprocessed);
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
+  if (!preprocessed) {
+    printf("line %d\n", __LINE__);
+    fflush(stdout);
+    [[maybe_unused]] auto [skip_rows_corrected, num_rows_corrected] = preprocess_file(0, -1, {});
+
+    printf("line %d\n", __LINE__);
+    fflush(stdout);
+    // todo: fix this (empty output may be incorrect)
+    if (file_itm_data.has_data) {
+      printf("line %d\n", __LINE__);
+      fflush(stdout);
+
+      // - compute column sizes and allocate output buffers.
+      //   important:
+      //   for nested schemas, we have to do some further preprocessing to determine:
+      //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
+      //    of
+      //      nesting and it's size is the row count)
+      //
+      // - for nested schemas, output buffer offset values per-page, per nesting-level for the
+      // purposes of decoding.
+      // TODO: make this a parameter.
+      auto const chunked_read_size = 240000;
+      //      auto const chunked_read_size = 0;
+      preprocess_columns(file_itm_data.chunks,
+                         file_itm_data.pages_info,
+                         skip_rows_corrected,
+                         num_rows_corrected,
+                         true /*uses_custom_row_bounds*/,
+                         chunked_read_size);
+
+      printf("line %d\n", __LINE__);
+      fflush(stdout);
+    }
+    preprocessed = true;
+    printf("line %d\n", __LINE__);
+    fflush(stdout);
+  }
+
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+  return current_read_chunk < chunk_read_info.size();
+}
+
 // Forward to implementation
 reader::reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
                parquet_reader_options const& options,
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index b74153c6cc0..a748f4e3550 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -97,7 +97,7 @@ class reader::impl {
    * @brief read_completed
    * @return
    */
-  bool has_next() { return current_read_chunk < chunk_read_info.size(); }
+  bool has_next();
 
  private:
   table_with_metadata read_chunk_internal(bool uses_custom_row_bounds);
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 12b806ce788..ababfe1ad78 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -48,25 +48,92 @@
 struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
 };
 
-TEST_F(ParquetChunkedReaderTest, Test)
+// TEST_F(ParquetChunkedReaderTest, Test)
+//{
+//  std::mt19937 gen(6542);
+//  std::bernoulli_distribution bn(0.7f);
+//  auto values = thrust::make_counting_iterator(0);
+
+//  constexpr cudf::size_type num_rows = 40000;
+//  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
+//  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
+
+//  cudf::table_view t({a, b});
+//  cudf::io::parquet_writer_options opts = cudf::io::parquet_writer_options::builder(
+//    cudf::io::sink_info{"/tmp/chunked_splits.parquet"}, t);
+//  cudf::io::write_parquet(opts);
+
+//  cudf::io::parquet_reader_options in_opts =
+//    cudf::io::parquet_reader_options::builder(cudf::io::source_info{"/tmp/chunked_splits.parquet"});
+//  auto result = cudf::io::read_parquet(in_opts);
+//  printf("\nResult size: %d\n\n", result.tbl->num_rows());
+//}
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedRead)
 {
   std::mt19937 gen(6542);
   std::bernoulli_distribution bn(0.7f);
-  // auto valids =
-  //    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
   auto values = thrust::make_counting_iterator(0);
 
+  printf("test, line %d\n\n", __LINE__);
   constexpr cudf::size_type num_rows = 40000;
   cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
   cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
 
+  printf("test, line %d\n\n", __LINE__);
+
   cudf::table_view t({a, b});
   cudf::io::parquet_writer_options opts = cudf::io::parquet_writer_options::builder(
     cudf::io::sink_info{"/tmp/chunked_splits.parquet"}, t);
   cudf::io::write_parquet(opts);
 
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{"/tmp/chunked_splits.parquet"});
-  auto result = cudf::io::read_parquet(in_opts);
-  printf("result size: %d\n", result.tbl->num_rows());
+  printf("test, line %d\n\n", __LINE__);
+
+  cudf::io::chunked_parquet_reader_options in_opts =
+    cudf::io::chunked_parquet_reader_options::builder(
+      cudf::io::source_info{"/tmp/chunked_splits.parquet"});
+
+  printf("test, line %d\n\n", __LINE__);
+  fflush(stdout);
+
+  cudf::io::chunked_parquet_reader reader(in_opts);
+
+#if 0
+  printf("test, line %d\n\n", __LINE__);
+  fflush(stdout);
+  reader.has_next();
+
+  printf("test, line %d\n\n", __LINE__);
+  fflush(stdout);
+  reader.has_next();
+
+  printf("test, line %d\n\n", __LINE__);
+  fflush(stdout);
+
+  {
+    auto result = reader.read_chunk();
+    printf("Result size: %d\n", result.tbl->num_rows());
+    fflush(stdout);
+  }
+
+  {
+    auto result = reader.read_chunk();
+    printf("Result size: %d\n", result.tbl->num_rows());
+    fflush(stdout);
+  }
+
+  printf("test, line %d\n\n", __LINE__);
+  fflush(stdout);
+  reader.has_next();
+
+#else
+  int count{0};
+  while (reader.has_next()) {
+    printf("\n\nhas next %d\n\n", count++);
+    fflush(stdout);
+    auto result = reader.read_chunk();
+    printf("Result size: %d\n", result.tbl->num_rows());
+    fflush(stdout);
+  }
+#endif
 }

From 66aeaf4c12b845d44bd66531cbc6abfd7d59669f Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 18 Oct 2022 12:55:03 -0700
Subject: [PATCH 045/162] Change param

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index de33e48cf9f..a9541cb0637 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1911,7 +1911,7 @@ table_with_metadata reader::impl::read_chunk()
 
   printf("line %d\n", __LINE__);
   fflush(stdout);
-  return read_chunk_internal(false);
+  return read_chunk_internal(true);
 }
 
 bool reader::impl::has_next()

From 1d700e3c28c29697c86d80bc13bb581b8197c16a Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 18 Oct 2022 13:06:51 -0700
Subject: [PATCH 046/162] Rename variables

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu       | 74 ++++++++++++-------------
 cpp/src/io/parquet/reader_impl.hpp      | 10 ++--
 cpp/src/io/parquet/reader_preprocess.cu | 12 ++--
 3 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index a9541cb0637..fc949b5b013 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1621,12 +1621,12 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
 
   // TODO: fix this.
   // Need to check if the file actually has data.
-  file_itm_data.has_data = true;
+  _file_itm_data.has_data = true;
 
   // Descriptors for all the chunks that make up the selected columns
   const auto num_input_columns = _input_columns.size();
   const auto num_chunks        = selected_row_groups.size() * num_input_columns;
-  file_itm_data.chunks         = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
+  _file_itm_data.chunks        = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
 
   // Association between each column chunk and its source
   std::vector<size_type> chunk_source_map(num_chunks);
@@ -1646,7 +1646,7 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
     auto const row_group_start  = rg.start_row;
     auto const row_group_source = rg.source_index;
     auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
-    auto const io_chunk_idx     = file_itm_data.chunks.size();
+    auto const io_chunk_idx     = _file_itm_data.chunks.size();
 
     // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
     for (size_t i = 0; i < num_input_columns; ++i) {
@@ -1662,12 +1662,12 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
                         schema.converted_type,
                         schema.type_length);
 
-      column_chunk_offsets[file_itm_data.chunks.size()] =
+      column_chunk_offsets[_file_itm_data.chunks.size()] =
         (col_meta.dictionary_page_offset != 0)
           ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
           : col_meta.data_page_offset;
 
-      file_itm_data.chunks.push_back(
+      _file_itm_data.chunks.push_back(
         gpu::ColumnChunkDesc(col_meta.total_compressed_size,
                              nullptr,
                              col_meta.num_values,
@@ -1689,7 +1689,7 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
                              col.schema_idx));
 
       // Map each column chunk to its column index and its source index
-      chunk_source_map[file_itm_data.chunks.size() - 1] = row_group_source;
+      chunk_source_map[_file_itm_data.chunks.size() - 1] = row_group_source;
 
       if (col_meta.codec != Compression::UNCOMPRESSED) {
         total_decompressed_size += col_meta.total_uncompressed_size;
@@ -1697,9 +1697,9 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
     }
     // Read compressed chunk data to device memory
     read_rowgroup_tasks.push_back(read_column_chunks(page_data,
-                                                     file_itm_data.chunks,
+                                                     _file_itm_data.chunks,
                                                      io_chunk_idx,
-                                                     file_itm_data.chunks.size(),
+                                                     _file_itm_data.chunks.size(),
                                                      column_chunk_offsets,
                                                      chunk_source_map));
 
@@ -1711,19 +1711,19 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
   assert(remaining_rows <= 0);
 
   // Process dataset chunk pages into output columns
-  const auto total_pages   = count_page_headers(file_itm_data.chunks);
-  file_itm_data.pages_info = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
+  const auto total_pages    = count_page_headers(_file_itm_data.chunks);
+  _file_itm_data.pages_info = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
 
   if (total_pages > 0) {
     rmm::device_buffer decomp_page_data;
 
     // decoding of column/page information
-    decode_page_headers(file_itm_data.chunks, file_itm_data.pages_info);
+    decode_page_headers(_file_itm_data.chunks, _file_itm_data.pages_info);
     if (total_decompressed_size > 0) {
-      decomp_page_data = decompress_page_data(file_itm_data.chunks, file_itm_data.pages_info);
+      decomp_page_data = decompress_page_data(_file_itm_data.chunks, _file_itm_data.pages_info);
       // Free compressed data
-      for (size_t c = 0; c < file_itm_data.chunks.size(); c++) {
-        if (file_itm_data.chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
+      for (size_t c = 0; c < _file_itm_data.chunks.size(); c++) {
+        if (_file_itm_data.chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
           page_data[c].reset();
         }
       }
@@ -1748,7 +1748,7 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
     // note : even for flat schemas, we allocate 1 level of "nesting" info
 
     allocate_nesting_info(
-      file_itm_data.chunks, file_itm_data.pages_info, file_itm_data.page_nesting_info);
+      _file_itm_data.chunks, _file_itm_data.pages_info, _file_itm_data.page_nesting_info);
   }
 
   return {skip_rows, num_rows};
@@ -1760,7 +1760,7 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
     // return empty
   }
 
-  auto const& read_info = chunk_read_info[current_read_chunk++];
+  auto const& read_info = _chunk_read_info[_current_read_chunk++];
   table_metadata out_metadata;
 
   // output cudf columns as determined by the top level schema
@@ -1771,9 +1771,9 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
   fflush(stdout);
 
   // allocate outgoing columns
-  allocate_columns(file_itm_data.chunks,
-                   file_itm_data.pages_info,
-                   chunk_itm_data,
+  allocate_columns(_file_itm_data.chunks,
+                   _file_itm_data.pages_info,
+                   _chunk_itm_data,
                    read_info.skip_rows,
                    read_info.num_rows,
                    uses_custom_row_bounds);
@@ -1784,9 +1784,9 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
   printf("read skip_rows = %d, num_rows = %d\n", (int)read_info.skip_rows, (int)read_info.num_rows);
 
   // decoding column data
-  decode_page_data(file_itm_data.chunks,
-                   file_itm_data.pages_info,
-                   file_itm_data.page_nesting_info,
+  decode_page_data(_file_itm_data.chunks,
+                   _file_itm_data.pages_info,
+                   _file_itm_data.page_nesting_info,
                    read_info.skip_rows,
                    read_info.num_rows);
 
@@ -1847,7 +1847,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
     preprocess_file(skip_rows, num_rows, row_group_list);
 
   // todo: fix this (empty output may be incorrect)
-  if (!file_itm_data.has_data) { return table_with_metadata{}; }
+  if (!_file_itm_data.has_data) { return table_with_metadata{}; }
 
   // - compute column sizes and allocate output buffers.
   //   important:
@@ -1861,8 +1861,8 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   // TODO: make this a parameter.
   auto const chunked_read_size = 240000;
   //      auto const chunked_read_size = 0;
-  preprocess_columns(file_itm_data.chunks,
-                     file_itm_data.pages_info,
+  preprocess_columns(_file_itm_data.chunks,
+                     _file_itm_data.pages_info,
                      skip_rows_corrected,
                      num_rows_corrected,
                      uses_custom_row_bounds,
@@ -1876,14 +1876,14 @@ table_with_metadata reader::impl::read_chunk()
   printf("line %d\n", __LINE__);
   fflush(stdout);
 
-  if (!preprocessed) {
+  if (!_file_preprocessed) {
     printf("line %d\n", __LINE__);
     fflush(stdout);
 
     [[maybe_unused]] auto [skip_rows_corrected, num_rows_corrected] = preprocess_file(0, -1, {});
 
     // todo: fix this (empty output may be incorrect)
-    if (file_itm_data.has_data) {
+    if (_file_itm_data.has_data) {
       // - compute column sizes and allocate output buffers.
       //   important:
       //   for nested schemas, we have to do some further preprocessing to determine:
@@ -1896,14 +1896,14 @@ table_with_metadata reader::impl::read_chunk()
       // TODO: make this a parameter.
       auto const chunked_read_size = 240000;
       //      auto const chunked_read_size = 0;
-      preprocess_columns(file_itm_data.chunks,
-                         file_itm_data.pages_info,
+      preprocess_columns(_file_itm_data.chunks,
+                         _file_itm_data.pages_info,
                          skip_rows_corrected,
                          num_rows_corrected,
                          true /*uses_custom_row_bounds*/,
                          chunked_read_size);
     }
-    preprocessed = true;
+    _file_preprocessed = true;
 
     printf("line %d\n", __LINE__);
     fflush(stdout);
@@ -1916,11 +1916,11 @@ table_with_metadata reader::impl::read_chunk()
 
 bool reader::impl::has_next()
 {
-  printf("prepr: %d\n", (int)preprocessed);
+  printf("prepr: %d\n", (int)_file_preprocessed);
   printf("line %d\n", __LINE__);
   fflush(stdout);
 
-  if (!preprocessed) {
+  if (!_file_preprocessed) {
     printf("line %d\n", __LINE__);
     fflush(stdout);
     [[maybe_unused]] auto [skip_rows_corrected, num_rows_corrected] = preprocess_file(0, -1, {});
@@ -1928,7 +1928,7 @@ bool reader::impl::has_next()
     printf("line %d\n", __LINE__);
     fflush(stdout);
     // todo: fix this (empty output may be incorrect)
-    if (file_itm_data.has_data) {
+    if (_file_itm_data.has_data) {
       printf("line %d\n", __LINE__);
       fflush(stdout);
 
@@ -1944,8 +1944,8 @@ bool reader::impl::has_next()
       // TODO: make this a parameter.
       auto const chunked_read_size = 240000;
       //      auto const chunked_read_size = 0;
-      preprocess_columns(file_itm_data.chunks,
-                         file_itm_data.pages_info,
+      preprocess_columns(_file_itm_data.chunks,
+                         _file_itm_data.pages_info,
                          skip_rows_corrected,
                          num_rows_corrected,
                          true /*uses_custom_row_bounds*/,
@@ -1954,14 +1954,14 @@ bool reader::impl::has_next()
       printf("line %d\n", __LINE__);
       fflush(stdout);
     }
-    preprocessed = true;
+    _file_preprocessed = true;
     printf("line %d\n", __LINE__);
     fflush(stdout);
   }
 
   printf("line %d\n", __LINE__);
   fflush(stdout);
-  return current_read_chunk < chunk_read_info.size();
+  return _current_read_chunk < _chunk_read_info.size();
 }
 
 // Forward to implementation
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index a748f4e3550..f26a407b7c5 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -268,11 +268,11 @@ class reader::impl {
   data_type _timestamp_type{type_id::EMPTY};
 
   // Variables used for chunked reading:
-  cudf::io::parquet::gpu::file_intermediate_data file_itm_data;
-  cudf::io::parquet::gpu::chunk_intermediate_data chunk_itm_data;
-  std::vector<cudf::io::parquet::gpu::chunk_read_info> chunk_read_info;
-  std::size_t current_read_chunk{0};
-  bool preprocessed{false};
+  cudf::io::parquet::gpu::file_intermediate_data _file_itm_data;
+  cudf::io::parquet::gpu::chunk_intermediate_data _chunk_itm_data;
+  std::vector<cudf::io::parquet::gpu::chunk_read_info> _chunk_read_info;
+  std::size_t _current_read_chunk{0};
+  bool _file_preprocessed{false};
 };
 
 }  // namespace parquet
diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index 9578a5126ba..135241eb99a 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -120,10 +120,10 @@ struct row_total_size {
 };
 
 std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo>& pages,
-                                                   gpu::chunk_intermediate_data const& id,
-                                                   size_type num_rows,
-                                                   size_type chunked_read_size,
-                                                   rmm::cuda_stream_view stream)
+                                                 gpu::chunk_intermediate_data const& id,
+                                                 size_type num_rows,
+                                                 size_type chunked_read_size,
+                                                 rmm::cuda_stream_view stream)
 {
   auto const& page_keys  = id.page_keys;
   auto const& page_index = id.page_index;
@@ -480,8 +480,8 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
     chunked_read_size > 0 ? compute_splits(pages, id, num_rows, chunked_read_size, _stream)
                           : std::vector<gpu::chunk_read_info>{{min_row, num_rows}};
 
-  chunk_itm_data  = std::move(id);
-  chunk_read_info = std::move(read_chunks);
+  _chunk_itm_data  = std::move(id);
+  _chunk_read_info = std::move(read_chunks);
 }
 
 void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,

From 4af948b2e8234ea64ff56b838abb8aef87c9d7ed Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 18 Oct 2022 13:09:48 -0700
Subject: [PATCH 047/162] Remove intermediate variables

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_preprocess.cu | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index 135241eb99a..2c57aacf201 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -396,7 +396,6 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
   }
 
   // intermediate data we will need for further chunked reads
-  gpu::chunk_intermediate_data id;
   if (has_lists || chunked_read_size > 0) {
     // computes:
     // PageNestingInfo::size for each level of nesting, for each page.
@@ -452,10 +451,10 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
     // 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3
     //
     // We also need to preserve key-relative page ordering, so we need to use a stable sort.
-    id.page_keys     = rmm::device_uvector<int>(pages.size(), _stream);
-    id.page_index    = rmm::device_uvector<int>(pages.size(), _stream);
-    auto& page_keys  = id.page_keys;
-    auto& page_index = id.page_index;
+    _chunk_itm_data.page_keys  = rmm::device_uvector<int>(pages.size(), _stream);
+    _chunk_itm_data.page_index = rmm::device_uvector<int>(pages.size(), _stream);
+    auto& page_keys            = _chunk_itm_data.page_keys;
+    auto& page_index           = _chunk_itm_data.page_index;
     {
       thrust::transform(rmm::exec_policy(_stream),
                         pages.device_ptr(),
@@ -476,12 +475,10 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
   }
 
   // compute splits if necessary.
-  std::vector<gpu::chunk_read_info> read_chunks =
-    chunked_read_size > 0 ? compute_splits(pages, id, num_rows, chunked_read_size, _stream)
-                          : std::vector<gpu::chunk_read_info>{{min_row, num_rows}};
-
-  _chunk_itm_data  = std::move(id);
-  _chunk_read_info = std::move(read_chunks);
+  _chunk_read_info =
+    chunked_read_size > 0
+      ? compute_splits(pages, _chunk_itm_data, num_rows, chunked_read_size, _stream)
+      : std::vector<gpu::chunk_read_info>{{min_row, num_rows}};
 }
 
 void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,

From 28cfc6fa5ac87ecb805d8511786bdf80cb405b80 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 18 Oct 2022 13:42:44 -0700
Subject: [PATCH 048/162] Modify tests

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_test.cpp | 75 +++++++++++------------------------
 1 file changed, 24 insertions(+), 51 deletions(-)

diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index ababfe1ad78..ea4ad22c833 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -48,27 +48,29 @@
 struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
 };
 
-// TEST_F(ParquetChunkedReaderTest, Test)
-//{
-//  std::mt19937 gen(6542);
-//  std::bernoulli_distribution bn(0.7f);
-//  auto values = thrust::make_counting_iterator(0);
-
-//  constexpr cudf::size_type num_rows = 40000;
-//  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
-//  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
-
-//  cudf::table_view t({a, b});
-//  cudf::io::parquet_writer_options opts = cudf::io::parquet_writer_options::builder(
-//    cudf::io::sink_info{"/tmp/chunked_splits.parquet"}, t);
-//  cudf::io::write_parquet(opts);
-
-//  cudf::io::parquet_reader_options in_opts =
-//    cudf::io::parquet_reader_options::builder(cudf::io::source_info{"/tmp/chunked_splits.parquet"});
-//  auto result = cudf::io::read_parquet(in_opts);
-//  printf("\nResult size: %d\n\n", result.tbl->num_rows());
-//}
+#if 0
+TEST_F(ParquetChunkedReaderTest, Test)
+{
+  std::mt19937 gen(6542);
+  std::bernoulli_distribution bn(0.7f);
+  auto values = thrust::make_counting_iterator(0);
 
+  constexpr cudf::size_type num_rows = 40000;
+  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
+  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
+
+  cudf::table_view t({a, b});
+  cudf::io::parquet_writer_options opts = cudf::io::parquet_writer_options::builder(
+    cudf::io::sink_info{"/tmp/chunked_splits.parquet"}, t);
+  cudf::io::write_parquet(opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{"/tmp/chunked_splits.parquet"});
+  auto result = cudf::io::read_parquet(in_opts);
+  printf("\nResult size read all: %d\n\n", result.tbl->num_rows());
+}
+
+#else
 TEST_F(ParquetChunkedReaderTest, TestChunkedRead)
 {
   std::mt19937 gen(6542);
@@ -98,42 +100,13 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedRead)
 
   cudf::io::chunked_parquet_reader reader(in_opts);
 
-#if 0
-  printf("test, line %d\n\n", __LINE__);
-  fflush(stdout);
-  reader.has_next();
-
-  printf("test, line %d\n\n", __LINE__);
-  fflush(stdout);
-  reader.has_next();
-
-  printf("test, line %d\n\n", __LINE__);
-  fflush(stdout);
-
-  {
-    auto result = reader.read_chunk();
-    printf("Result size: %d\n", result.tbl->num_rows());
-    fflush(stdout);
-  }
-
-  {
-    auto result = reader.read_chunk();
-    printf("Result size: %d\n", result.tbl->num_rows());
-    fflush(stdout);
-  }
-
-  printf("test, line %d\n\n", __LINE__);
-  fflush(stdout);
-  reader.has_next();
-
-#else
   int count{0};
   while (reader.has_next()) {
     printf("\n\nhas next %d\n\n", count++);
     fflush(stdout);
     auto result = reader.read_chunk();
-    printf("Result size: %d\n", result.tbl->num_rows());
+    printf("Result size: %d\n\n\n\n\n", result.tbl->num_rows());
     fflush(stdout);
   }
-#endif
 }
+#endif

From 8135ed5a9c9062518eeadf903be8aa54e44aebee Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Tue, 18 Oct 2022 15:50:33 -0500
Subject: [PATCH 049/162] First pass of string support.

---
 cpp/src/io/parquet/page_data.cu         | 207 ++++++++++++++++++++----
 cpp/src/io/parquet/page_hdr.cu          |   1 +
 cpp/src/io/parquet/parquet_gpu.hpp      |  13 +-
 cpp/src/io/parquet/reader_impl.cu       |  39 +----
 cpp/src/io/parquet/reader_impl.hpp      |   1 -
 cpp/src/io/parquet/reader_preprocess.cu | 147 ++++++++++++++---
 6 files changed, 318 insertions(+), 90 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 8a7773da174..c0dc5eeea9e 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -460,8 +460,8 @@ __device__ void gpuInitStringDescriptors(volatile page_state_s* s, int target_po
   }
 }
 
-
-inline __device__ std::pair<const char*, size_t> gpuGetStringData(volatile page_state_s* s, int src_pos)
+inline __device__ std::pair<const char*, size_t> gpuGetStringData(volatile page_state_s* s,
+                                                                  int src_pos)
 {
   const char* ptr = nullptr;
   size_t len      = 0;
@@ -492,16 +492,14 @@ inline __device__ std::pair<const char*, size_t> gpuGetStringData(volatile page_
  * @brief Get the length of a string
  *
  * @param[in,out] s Page state input/output
- * @param[in] src_pos Source position 
- * 
+ * @param[in] src_pos Source position
+ *
  * @return The length of the string
  */
 inline __device__ size_type gpuGetStringSize(volatile page_state_s* s, int src_pos)
 {
-  if(s->dtype_len == 4){
-    return 4;
-  }
-  auto [ptr, len] = gpuGetStringData(s, src_pos);
+  if (s->dtype_len == 4) { return 4; }
+  auto [_, len] = gpuGetStringData(s, src_pos);
   return len;
 }
 
@@ -1053,6 +1051,7 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
       s->page.skipped_leaf_values = 0;
       s->input_value_count        = 0;
       s->input_row_count          = 0;
+      s->input_leaf_count         = 0;
 
       s->row_index_lower_bound = -1;
     }
@@ -1408,7 +1407,7 @@ __device__ void gpuDecodeLevels(page_state_s* s, int32_t target_leaf_count, int
  * @param[in] bounds_set Whether or not s->row_index_lower_bound, s->first_row and s->num_rows
  * have been computed for this page (they will only be set in the second/trim pass).
  */
-template<bool is_string_column>
+template <bool is_string_column>
 static __device__ void gpuUpdatePageSizes(page_state_s* s,
                                           int32_t target_input_value_count,
                                           int t,
@@ -1428,17 +1427,16 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
     get_nesting_bounds(
       start_depth, end_depth, d, s, input_value_count, target_input_value_count, t);
 
-
     // count rows and leaf values
-    int const is_new_row                = start_depth == 0 ? 1 : 0;
-    uint32_t const warp_row_count_mask  = ballot(is_new_row);
-    int const is_new_leaf               = (d >= s->page.nesting[max_depth - 1].max_def_level) ? 1 : 0;
+    int const is_new_row               = start_depth == 0 ? 1 : 0;
+    uint32_t const warp_row_count_mask = ballot(is_new_row);
+    int const is_new_leaf = (d >= s->page.nesting[max_depth - 1].max_def_level) ? 1 : 0;
     uint32_t const warp_leaf_count_mask = ballot(is_new_leaf);
     // is this thread within row bounds? on the first pass we don't know the bounds, so we will be
     // computing the full size of the column.  on the second pass, we will know our actual row
     // bounds, so the computation will cap sizes properly.
-    int in_row_bounds = 1;
-    auto const first_thread_in_range = [&](){
+    int in_row_bounds                = 1;
+    auto const first_thread_in_range = [&]() {
       if (bounds_set) {
         // absolute row index
         int32_t thread_row_index =
@@ -1462,30 +1460,47 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
         }
 
         return first_thread_in_range;
-      }       
+      }
       return 0;
     }();
 
     // increment counts across all nesting depths
     for (int s_idx = 0; s_idx < max_depth; s_idx++) {
       // if we are within the range of nesting levels we should be adding value indices for
-      int const in_nesting_bounds = (s_idx >= start_depth && s_idx <= end_depth && in_row_bounds) ? 1 : 0;
-
+      int const in_nesting_bounds =
+        (s_idx >= start_depth && s_idx <= end_depth && in_row_bounds) ? 1 : 0;
       uint32_t const count_mask = ballot(in_nesting_bounds);
-      if (!t) { s->page.nesting[s_idx].size += __popc(count_mask); }
-    }
+      if (!t) {
+        s->page.nesting[s_idx].size += __popc(count_mask);
+        // printf("New size (%d): %d\n", s_idx, s->page.nesting[s_idx].size);
+      }
 
-    // if this is a leaf in a string column, add the size    
-    /*
-    if constexpr(is_string_column){
-      if(is_new_leaf){
-        int const src_pos = input_leaf_count + __popc(warp_leaf_count_mask & ((1 << first_thread_in_range) - 1));
-        auto const len = gpuGetStringSize(s, src_pos);
-        auto const len = 4;
-        if (!t) { s->page.str_bytes += len; }
+      // string lengths, if applicable
+      if constexpr (is_string_column) {
+        if (s_idx == max_depth - 1) {
+          // string len for each thread
+          size_type const str_len = [&]() {
+            if (is_new_leaf) {
+              int const src_pos  = input_leaf_count + __popc(warp_leaf_count_mask & ((1 << t) - 1));
+              auto const str_len = gpuGetStringSize(s, src_pos);
+              // printf("S(%d): len(%d), src_pos(%d), input_leaf_count(%d)\n", t, str_len, src_pos,
+              // input_leaf_count);
+              return str_len;
+            }
+            return 0;
+          }();
+
+          // sum sizes from all threads.
+          using warp_reduce = cub::WarpReduce<uint32_t>;
+          __shared__ typename warp_reduce::TempStorage temp_storage[1];
+          size_type warp_total_str_len = warp_reduce(temp_storage[0]).Sum(str_len);
+          if (!t) {
+            s->page.str_bytes += warp_total_str_len;
+            // printf("STR BYTES: %d\n", s->page.str_bytes);
+          }
+        }
       }
     }
-    */ 
 
     input_value_count += min(32, (target_input_value_count - input_value_count));
     input_row_count += __popc(warp_row_count_mask);
@@ -1500,6 +1515,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   }
 }
 
+#if 0
 /**
  * @brief Kernel for computing per-page column size information for all nesting levels.
  *
@@ -1536,6 +1552,10 @@ __global__ void __launch_bounds__(block_size)
   bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
   bool const is_string_column = (s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4;
 
+  if(!t){
+    printf("is_string_column: %d\n", (int)is_string_column);
+  }
+
   // if this is a flat hierarchy (no lists) and is not a string column, compute the size directly from the number of values.  
   if (!has_repetition && !is_string_column) {
     if (!t) {
@@ -1575,7 +1595,7 @@ __global__ void __launch_bounds__(block_size)
   if (t < 32) {
     constexpr int batch_size = 32;
     int target_input_count   = batch_size;
-    while (!s->error && s->input_value_count < s->num_input_values) {
+    while (!s->error && s->input_value_count < s->num_input_values) {      
       // decode repetition and definition levels. these will attempt to decode at
       // least up to the target, but may decode a few more.
       if(has_repetition){
@@ -1586,7 +1606,7 @@ __global__ void __launch_bounds__(block_size)
 
       // we may have decoded different amounts from each stream, so only process what we've been
       int actual_input_count = has_repetition ? min(s->lvl_count[level_type::REPETITION],
-                                                 s->lvl_count[level_type::DEFINITION])
+                                                  s->lvl_count[level_type::DEFINITION])
                                               : s->lvl_count[level_type::DEFINITION];
 
       // process what we got back
@@ -1595,6 +1615,7 @@ __global__ void __launch_bounds__(block_size)
       } else {
         gpuUpdatePageSizes<false>(s, actual_input_count, t, trim_pass);
       }
+      
       target_input_count = actual_input_count + batch_size;
       __syncwarp();
     }
@@ -1606,6 +1627,130 @@ __global__ void __launch_bounds__(block_size)
     pp->skipped_leaf_values = s->page.skipped_leaf_values;
   }
 }
+#endif
+
+/**
+ * @brief Kernel for computing per-page column size information for all nesting levels.
+ *
+ * This function will write out the size field for each level of nesting.
+ *
+ * @param pages List of pages
+ * @param chunks List of column chunks
+ * @param min_row Row index to start reading at
+ * @param num_rows Maximum number of rows to read. Pass as INT_MAX to guarantee reading all rows.
+ * @param trim_pass Whether or not this is the trim pass.  We first have to compute
+ * the full size information of every page before we come through in a second (trim) pass
+ * to determine what subset of rows in this page we should be reading.
+ */
+__global__ void __launch_bounds__(block_size)
+  gpuComputePageSizes(PageInfo* pages,
+                      device_span<ColumnChunkDesc const> chunks,
+                      size_t min_row,
+                      size_t num_rows,
+                      bool trim_pass)
+{
+  __shared__ __align__(16) page_state_s state_g;
+
+  page_state_s* const s = &state_g;
+  int page_idx          = blockIdx.x;
+  int t                 = threadIdx.x;
+  PageInfo* pp          = &pages[page_idx];
+
+  if (!setupLocalPageInfo(s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX)) {
+    return;
+  }
+
+  // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
+  // containing lists anywhere within).
+  bool const has_repetition   = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
+  bool const is_string_column = (s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4;
+
+  // if this is a flat hierarchy (no lists) and is not a string column, compute the size directly
+  // from the number of values.
+  if (!has_repetition && !is_string_column) {
+    if (!t) {
+      // note: doing this for all nesting level because we can still have structs even if we don't
+      // have lists.
+      for (size_type idx = 0; idx < pp->num_nesting_levels; idx++) {
+        pp->nesting[idx].size = pp->num_input_values;
+      }
+    }
+    return;
+  }
+
+  // zero sizes
+  int d = 0;
+  while (d < s->page.num_nesting_levels) {
+    if (d + t < s->page.num_nesting_levels) { s->page.nesting[d + t].size = 0; }
+    d += blockDim.x;
+  }
+  if (!t) {
+    s->page.skipped_values      = -1;
+    s->page.skipped_leaf_values = -1;
+    s->page.str_bytes           = 0;
+    s->input_row_count          = 0;
+    s->input_value_count        = 0;
+
+    // if this isn't the trim pass, make sure we visit absolutely everything
+    if (!trim_pass) {
+      s->first_row             = 0;
+      s->num_rows              = INT_MAX;
+      s->row_index_lower_bound = -1;
+    }
+  }
+  __syncthreads();
+
+  // optimization : it might be useful to have a version of gpuDecodeStream that could go wider than
+  // 1 warp.  Currently it only uses 1 warp so that it can overlap work with the value decoding step
+  // when in the actual value decoding kernel. However, during this preprocess step we have no such
+  // limits -  we could go as wide as block_size
+  if (t < 32) {
+    constexpr int batch_size = 32;
+    int target_input_count   = batch_size;
+    while (!s->error && s->input_value_count < s->num_input_values) {
+      // decode repetition and definition levels. these will attempt to decode at
+      // least up to the target, but may decode a few more.
+      if (has_repetition) {
+        gpuDecodeStream(s->rep, s, target_input_count, t, level_type::REPETITION);
+      }
+      gpuDecodeStream(s->def, s, target_input_count, t, level_type::DEFINITION);
+      __syncwarp();
+
+      // we may have decoded different amounts from each stream, so only process what we've been
+      int actual_input_count = has_repetition ? min(s->lvl_count[level_type::REPETITION],
+                                                    s->lvl_count[level_type::DEFINITION])
+                                              : s->lvl_count[level_type::DEFINITION];
+      actual_input_count     = min(actual_input_count, s->num_input_values);
+
+      // process what we got back
+      if (is_string_column) {
+        auto src_target_pos = target_input_count;
+        // TODO: compute this in another warp like the decode step does
+        if (s->dict_base) {
+          src_target_pos = gpuDecodeDictionaryIndices(s, src_target_pos, t);
+        } else if ((s->col.data_type & 7) == BYTE_ARRAY) {
+          gpuInitStringDescriptors(s, src_target_pos, t);
+        }
+        if (!t) { *(volatile int32_t*)&s->dict_pos = src_target_pos; }
+
+        gpuUpdatePageSizes<true>(s, actual_input_count, t, trim_pass);
+      } else {
+        gpuUpdatePageSizes<false>(s, actual_input_count, t, trim_pass);
+      }
+
+      // target_input_count = actual_input_count + batch_size;
+      target_input_count += batch_size;
+      __syncwarp();
+    }
+  }
+  // update # rows in the actual page
+  if (!t) {
+    pp->num_rows            = s->page.nesting[0].size;
+    pp->skipped_values      = s->page.skipped_values;
+    pp->skipped_leaf_values = s->page.skipped_leaf_values;
+    pp->str_bytes           = s->page.str_bytes;
+  }
+}
 
 /**
  * @brief Kernel for co the column data stored in the pages
diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu
index e7856a871c1..e4122b37b19 100644
--- a/cpp/src/io/parquet/page_hdr.cu
+++ b/cpp/src/io/parquet/page_hdr.cu
@@ -366,6 +366,7 @@ __global__ void __launch_bounds__(128)
       // definition levels
       bs->page.chunk_row = 0;
       bs->page.num_rows  = 0;
+      bs->page.str_bytes = 0;
     }
     num_values     = bs->ck.num_values;
     page_info      = bs->ck.page_info;
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index a5e2b3d8be4..62bfee1f1d8 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -105,10 +105,10 @@ struct PageNestingInfo {
 
   // set at initialization
   int32_t max_def_level;
-  int32_t max_rep_level;  
+  int32_t max_rep_level;
 
   // set during preprocessing
-  int32_t size;              // this page/nesting-level's row count contribution to the output column
+  int32_t size;  // this page/nesting-level's row count contribution to the output column
   int32_t page_start_value;  // absolute output start index in output column data
 
   // set during data decoding
@@ -144,7 +144,7 @@ struct PageInfo {
   Encoding encoding;       // Encoding for data or dictionary page
   Encoding definition_level_encoding;  // Encoding used for definition levels (data page)
   Encoding repetition_level_encoding;  // Encoding used for repetition levels (data page)
-  cudf::type_id type;     // type of this page. 
+  cudf::type_id type;                  // type of this page.
 
   // for nested types, we run a preprocess step in order to determine output
   // column sizes. Because of this, we can jump directly to the position in the
@@ -158,7 +158,7 @@ struct PageInfo {
   int skipped_values;
   // # of values skipped in the actual data stream.
   int skipped_leaf_values;
-  int32_t str_bytes;         // for string columns only, the size in bytes
+  int32_t str_bytes;  // for string columns only, the size in for all the chars in the string
 
   // nesting information (input/output) for each page
   int num_nesting_levels;
@@ -247,8 +247,11 @@ struct ColumnChunkDesc {
 struct chunked_intermediate_data {
   rmm::device_uvector<int> page_keys;
   rmm::device_uvector<int> page_index;
+  rmm::device_uvector<string_index_pair> str_dict_index;
   chunked_intermediate_data()
-    : page_keys(0, rmm::cuda_stream_default), page_index(0, rmm::cuda_stream_default)
+    : page_keys(0, rmm::cuda_stream_default),
+      page_index(0, rmm::cuda_stream_default),
+      str_dict_index(0, rmm::cuda_stream_default)
   {
   }
 };
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index be67775164d..ed21a6788c1 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1260,7 +1260,7 @@ void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc>
     auto& schema                          = _metadata->get_schema(src_col_schema);
     auto const per_page_nesting_info_size = std::max(
       schema.max_definition_level + 1, _metadata->get_output_nesting_depth(src_col_schema));
-    auto const type_id = to_type_id(schema, _strings_to_categorical, _timestamp_type.id()); 
+    auto const type_id = to_type_id(schema, _strings_to_categorical, _timestamp_type.id());
 
     // skip my dict pages
     target_page_index += chunks[idx].num_dict_pages;
@@ -1269,8 +1269,8 @@ void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc>
       pages[target_page_index + p_idx].num_nesting_levels = per_page_nesting_info_size;
 
       // this isn't the ideal place to be setting this value (it's not obvious this function would
-      // do it) but we don't have any other places that go host->device with the pages and I'd like to
-      // avoid another copy
+      // do it) but we don't have any other places that go host->device with the pages and I'd like
+      // to avoid another copy
       pages[target_page_index + p_idx].type = type_id;
 
       src_info_index += per_page_nesting_info_size;
@@ -1361,23 +1361,6 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
                                     size_t min_row,
                                     size_t total_rows)
 {
-  auto is_dict_chunk = [](const gpu::ColumnChunkDesc& chunk) {
-    return (chunk.data_type & 0x7) == BYTE_ARRAY && chunk.num_dict_pages > 0;
-  };
-
-  // Count the number of string dictionary entries
-  // NOTE: Assumes first page in the chunk is always the dictionary page
-  size_t total_str_dict_indexes = 0;
-  for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
-    if (is_dict_chunk(chunks[c])) { total_str_dict_indexes += pages[page_count].num_input_values; }
-    page_count += chunks[c].max_num_pages;
-  }
-
-  // Build index for string dictionaries since they can't be indexed
-  // directly due to variable-sized elements
-  auto str_dict_index = cudf::detail::make_zeroed_device_uvector_async<string_index_pair>(
-    total_str_dict_indexes, _stream);
-
   // TODO (dm): hd_vec should have begin and end iterator members
   size_t sum_max_depths =
     std::accumulate(chunks.host_ptr(),
@@ -1395,16 +1378,11 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   auto chunk_offsets       = std::vector<size_t>();
 
   // Update chunks with pointers to column data.
-  for (size_t c = 0, page_count = 0, str_ofs = 0, chunk_off = 0; c < chunks.size(); c++) {
+  for (size_t c = 0, page_count = 0, chunk_off = 0; c < chunks.size(); c++) {
     input_column_info const& input_col = _input_columns[chunks[c].src_col_index];
     CUDF_EXPECTS(input_col.schema_idx == chunks[c].src_col_schema,
                  "Column/page schema index mismatch");
 
-    if (is_dict_chunk(chunks[c])) {
-      chunks[c].str_dict_index = str_dict_index.data() + str_ofs;
-      str_ofs += pages[page_count].num_input_values;
-    }
-
     size_t max_depth = _metadata->get_output_nesting_depth(chunks[c].src_col_schema);
     chunk_offsets.push_back(chunk_off);
 
@@ -1476,10 +1454,6 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   chunk_nested_valids.host_to_device(_stream);
   chunk_nested_data.host_to_device(_stream);
 
-  if (total_str_dict_indexes > 0) {
-    gpu::BuildStringDictionaryIndex(chunks.device_ptr(), chunks.size(), _stream);
-  }
-
   gpu::DecodePageData(pages, chunks, total_rows, min_row, _stream);
   pages.device_to_host(_stream);
   page_nesting.device_to_host(_stream);
@@ -1716,7 +1690,8 @@ table_with_metadata reader::impl::read(size_type skip_rows,
       // - for nested schemas, output buffer offset values per-page, per nesting-level for the
       // purposes of decoding.
       // TODO: make this a parameter.
-      //auto const chunked_read_size = 240000;
+      // auto const chunked_read_size = 240000;
+      // auto const chunked_read_size = 1000000;
       auto const chunked_read_size = 0;
       auto chunk_reads             = preprocess_columns(
         chunks, pages, skip_rows, num_rows, uses_custom_row_bounds, chunked_read_size);
@@ -1796,4 +1771,4 @@ table_with_metadata reader::read(parquet_reader_options const& options)
 }  // namespace parquet
 }  // namespace detail
 }  // namespace io
-}  // namespace cudf
+}  // namespace cudf
\ No newline at end of file
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 91abdb7d3b8..04827c16994 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -180,7 +180,6 @@ class reader::impl {
                         size_t total_rows,
                         bool uses_custom_row_bounds);
 
-
   /**
    * @brief Converts the page data and outputs to columns.
    *
diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index 4fc81acb510..959ebe66e18 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -18,6 +18,7 @@
 
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
@@ -79,8 +80,11 @@ __device__ size_t row_size_functor::operator()<struct_view>(size_t num_rows, boo
 
 template <>
 __device__ size_t row_size_functor::operator()<string_view>(size_t num_rows, bool nullable)
-{  
-  return 0 + validity_size(num_rows, nullable);
+{
+  // only returns the size of offsets and validity. the size of the actual string chars
+  // is tracked seperately.
+  auto const offset_size = sizeof(offset_type);
+  return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable);
 }
 
 struct get_cumulative_row_info {
@@ -95,7 +99,10 @@ struct get_cumulative_row_info {
     size_t const row_count = page.nesting[0].size;
     return cumulative_row_info{
       row_count,
-      cudf::type_dispatcher(data_type{page.type}, row_size_functor{}, row_count, false),
+      // note: the size of the actual char bytes for strings is tracked in the `str_bytes` field, so
+      // the row_size_functor{} itself is only returning the size of offsets+validity
+      cudf::type_dispatcher(data_type{page.type}, row_size_functor{}, row_count, false) +
+        page.str_bytes,
       page.src_col_schema};
   }
 };
@@ -144,8 +151,9 @@ std::vector<gpu::chunked_read_info> compute_splits(hostdevice_vector<gpu::PageIn
                                 page_input,
                                 c_info.begin(),
                                 thrust::equal_to{},
-                                cumulative_row_sum{});  
+                                cumulative_row_sum{});
   // clang-format off
+  /*
   stream.synchronize();
   pages.device_to_host(stream);
   std::vector<int> h_page_index(pages.size());
@@ -158,7 +166,7 @@ std::vector<gpu::chunked_read_info> compute_splits(hostdevice_vector<gpu::PageIn
     }
     gpu::PageNestingInfo pni;
     cudaMemcpy(&pni, &page.nesting[0], sizeof(gpu::PageNestingInfo), cudaMemcpyDeviceToHost);
-    printf("P(%lu): schema(%d), num_rows(%d), nesting size 0(%d)\n", idx, page.src_col_schema, page.num_rows, pni.size);
+    printf("P(%lu): schema(%d), num_rows(%d), nesting size 0(%d), str_bytes(%d)\n", idx, page.src_col_schema, page.num_rows, pni.size, page.str_bytes);
   }
   printf("---------\n");
   std::vector<cumulative_row_info> h_c_info(page_keys.size());
@@ -166,6 +174,7 @@ std::vector<gpu::chunked_read_info> compute_splits(hostdevice_vector<gpu::PageIn
   for(size_t idx=0; idx<page_keys.size(); idx++){ 
     printf("C(%lu): %lu, %lu\n", idx, h_c_info[idx].row_count, h_c_info[idx].size_bytes);
   }
+  */
   // clang-format on  
 
   // sort by row count
@@ -187,7 +196,7 @@ std::vector<gpu::chunked_read_info> compute_splits(hostdevice_vector<gpu::PageIn
                                                     thrust::make_discard_iterator(),
                                                     key_offsets.begin());
   size_t const num_unique_keys = key_offsets_end - key_offsets.begin();
-  // clang-format off
+  // clang-format off  
   /*
   stream.synchronize();
   printf("Num keys: %d\n", (int)num_unique_keys);
@@ -195,7 +204,7 @@ std::vector<gpu::chunked_read_info> compute_splits(hostdevice_vector<gpu::PageIn
   cudaMemcpy(h_key_offsets.data(), key_offsets.data(), sizeof(size_type) * num_unique_keys, cudaMemcpyDeviceToHost); 
   for(size_t idx=0; idx<num_unique_keys; idx++){ 
     printf("Offset sizes(%lu): %d\n", idx, h_key_offsets[idx]);
-  }
+  }  
   */
   // clang-format on  
 
@@ -207,7 +216,7 @@ std::vector<gpu::chunked_read_info> compute_splits(hostdevice_vector<gpu::PageIn
   cudaMemcpy(h_key_offsets.data(), key_offsets.data(), sizeof(size_type) * (num_unique_keys + 1), cudaMemcpyDeviceToHost); 
   for(size_t idx=0; idx<num_unique_keys+1; idx++){ 
     printf("Offset values(%lu): %d\n", idx, h_key_offsets[idx]);
-  }
+  } 
   */
   // clang-format on
 
@@ -273,7 +282,7 @@ std::vector<gpu::chunked_read_info> compute_splits(hostdevice_vector<gpu::PageIn
       auto const start_row = cur_row_count;
       cur_row_count        = h_adjusted[p].row_count;
       splits.push_back(gpu::chunked_read_info{start_row, cur_row_count - start_row});
-      //printf("Split: {%lu, %lu}\n", splits.back().skip_rows, splits.back().num_rows);
+      // printf("Split: {%lu, %lu}\n", splits.back().skip_rows, splits.back().num_rows);
       cur_pos         = p;
       cumulative_size = h_adjusted[p].size_bytes;
     }
@@ -408,8 +417,50 @@ reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks
     if (has_lists) { break; }
   }
 
-  // intermediate data we will need for further chunked reads
   gpu::chunked_intermediate_data id;
+
+  // generate string dict indices if necessary
+  {
+    auto is_dict_chunk = [](const gpu::ColumnChunkDesc& chunk) {
+      return (chunk.data_type & 0x7) == BYTE_ARRAY && chunk.num_dict_pages > 0;
+    };
+
+    // Count the number of string dictionary entries
+    // NOTE: Assumes first page in the chunk is always the dictionary page
+    size_t total_str_dict_indexes = 0;
+    for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
+      if (is_dict_chunk(chunks[c])) {
+        total_str_dict_indexes += pages[page_count].num_input_values;
+      }
+      page_count += chunks[c].max_num_pages;
+    }
+
+    // Build index for string dictionaries since they can't be indexed
+    // directly due to variable-sized elements
+    id.str_dict_index = cudf::detail::make_zeroed_device_uvector_async<string_index_pair>(
+      total_str_dict_indexes, _stream);
+
+    // Update chunks with pointers to string dict indices
+    for (size_t c = 0, page_count = 0, str_ofs = 0; c < chunks.size(); c++) {
+      input_column_info const& input_col = _input_columns[chunks[c].src_col_index];
+      CUDF_EXPECTS(input_col.schema_idx == chunks[c].src_col_schema,
+                   "Column/page schema index mismatch");
+      if (is_dict_chunk(chunks[c])) {
+        chunks[c].str_dict_index = id.str_dict_index.data() + str_ofs;
+        str_ofs += pages[page_count].num_input_values;
+      }
+
+      // column_data_base will always point to leaf data, even for nested types.
+      page_count += chunks[c].max_num_pages;
+    }
+
+    if (total_str_dict_indexes > 0) {
+      chunks.host_to_device(_stream);
+      gpu::BuildStringDictionaryIndex(chunks.device_ptr(), chunks.size(), _stream);
+    }
+  }
+
+  // intermediate data we will need for further chunked reads
   if (has_lists || chunked_read_size > 0) {
     // computes:
     // PageNestingInfo::size for each level of nesting, for each page.
@@ -574,20 +625,20 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
 
           // allocate
           out_buf.create(size, _stream, _mr);
-        }      
+        }
 
         // for nested hierarchies, compute per-page start offset
         if (input_col.has_repetition) {
-            thrust::exclusive_scan_by_key(
-              rmm::exec_policy(_stream),
-              page_keys.begin(),
-              page_keys.end(),
-              size_input,
-              start_offset_output_iterator{pages.device_ptr(),
-                                          page_index.begin(),
-                                          0,
-                                          static_cast<int>(src_col_schema),
-                                          static_cast<int>(l_idx)});
+          thrust::exclusive_scan_by_key(
+            rmm::exec_policy(_stream),
+            page_keys.begin(),
+            page_keys.end(),
+            size_input,
+            start_offset_output_iterator{pages.device_ptr(),
+                                         page_index.begin(),
+                                         0,
+                                         static_cast<int>(src_col_schema),
+                                         static_cast<int>(l_idx)});
         }
       }
     }
@@ -617,6 +668,60 @@ cudf::io::parquet_reader_options::builder(cudf::io::source_info{"parquet/tmp/chu
 }
 */
 
+/*
+{
+    // values the cudf parquet writer uses
+    // constexpr size_t default_max_page_size_bytes    = 512 * 1024;   ///< 512KB per page
+    // constexpr size_type default_max_page_size_rows  = 20000;        ///< 20k rows per page
+
+    std::mt19937 gen(6542);
+    std::bernoulli_distribution bn(0.7f);
+    auto values = thrust::make_counting_iterator(0);
+
+    constexpr size_type num_rows = 60000;
+
+    // ints                                            Page    total bytes   cumulative bytes
+    // 20000 rows of 4 bytes each                    = A0      80000         80000
+    // 20000 rows of 4 bytes each                    = A1      80000         160000
+    // 20000 rows of 4 bytes each                    = A2      80000         240000
+    cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
+
+    // strings                                         Page    total bytes   cumulative bytes
+    // 20000 rows of 1 char each    (20000  + 80004) = B0      100004        100004
+    // 20000 rows of 4 chars each   (80000  + 80004) = B1      160004        260008
+    // 20000 rows of 16 chars each  (320000 + 80004) = B2      400004        660012
+    std::vector<std::string> strings { "a", "bbbb", "cccccccccccccccc" };
+    auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int i){
+      if(i < 20000){
+        return strings[0];
+      }
+      if(i < 40000){
+        return strings[1];
+      }
+      return strings[2];
+    });
+    cudf::test::strings_column_wrapper b{str_iter, str_iter + num_rows};
+
+    // cumulative sizes
+    // A0 + B0 :  180004
+    // A1 + B1 :  420008
+    // A2 + B2 :  900012
+    //                                                    skip_rows / num_rows
+    // chunked_read_size of 500000  should give 2 chunks: {0, 40000},           {40000, 20000}
+    // chunked_read_size of 1000000 should give 1 chunks: {0, 60000},
+
+    auto write_tbl = table_view{{a, b}};
+    auto filepath = std::string{"parquet/tmp/chunked_splits_strings.parquet"};
+    cudf::io::parquet_writer_options out_opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, write_tbl);
+    cudf::io::write_parquet(out_opts);
+
+    cudf::io::parquet_reader_options in_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    auto result   = cudf::io::read_parquet(in_opts);
+  }
+  */
+
 }  // namespace parquet
 }  // namespace detail
 }  // namespace io

From fbeabfc13108a177c5bfa0098ba1b3e0c8b20c13 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 18 Oct 2022 14:07:00 -0700
Subject: [PATCH 050/162] Fix bug

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu  | 31 +++++++++++++++++++++++++++---
 cpp/src/io/parquet/reader_impl.hpp |  2 ++
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index fc949b5b013..4fdc00f3a74 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1500,7 +1500,21 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   printf("read total_rows = %d, min_row = %d\n", (int)total_rows, (int)min_row);
   fflush(stdout);
 
+  printf("pages size= %d, chunk size = %d, pages = %zu\n",
+         (int)pages.size(),
+         (int)chunks.size(),
+         (size_t)pages.device_ptr());
+  fflush(stdout);
+
   gpu::DecodePageData(pages, chunks, total_rows, min_row, _stream);
+
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
+  _stream.synchronize();
+  printf("line %d\n", __LINE__);
+  fflush(stdout);
+
   pages.device_to_host(_stream);
   page_nesting.device_to_host(_stream);
   _stream.synchronize();
@@ -1580,7 +1594,7 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                    parquet_reader_options const& options,
                    rmm::cuda_stream_view stream,
                    rmm::mr::device_memory_resource* mr)
-  : _stream(stream), _mr(mr), _sources(std::move(sources))
+  : _stream(stream), _mr(mr), _sources(std::move(sources)), _options(options)
 {
   // Open and parse the source dataset metadata
   _metadata = std::make_unique<aggregate_reader_metadata>(_sources);
@@ -1873,11 +1887,22 @@ table_with_metadata reader::impl::read(size_type skip_rows,
 
 table_with_metadata reader::impl::read_chunk()
 {
+  {
+    // TODO: this be called once, then _output_columns is saved as a template and copied to the
+    // output each time.
+    std::tie(_input_columns, _output_columns, _output_column_schemas) =
+      _metadata->select_columns(_options.get_columns(),
+                                _options.is_enabled_use_pandas_metadata(),
+                                _strings_to_categorical,
+                                _timestamp_type.id());
+  }
+
   printf("line %d\n", __LINE__);
   fflush(stdout);
 
-  if (!_file_preprocessed) {
-    printf("line %d\n", __LINE__);
+  //  if (!_file_preprocessed) {
+  if (true) {
+    printf("preprocessing from the beginning ===================line %d\n", __LINE__);
     fflush(stdout);
 
     [[maybe_unused]] auto [skip_rows_corrected, num_rows_corrected] = preprocess_file(0, -1, {});
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index f26a407b7c5..140f6a854c9 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -273,6 +273,8 @@ class reader::impl {
   std::vector<cudf::io::parquet::gpu::chunk_read_info> _chunk_read_info;
   std::size_t _current_read_chunk{0};
   bool _file_preprocessed{false};
+
+  parquet_reader_options const& _options;
 };
 
 }  // namespace parquet

From df074e03b5cb4743d52e91b858643ebbc315a441 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 18 Oct 2022 14:07:39 -0700
Subject: [PATCH 051/162] Remove debug print

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 83 -------------------------------
 cpp/tests/io/parquet_test.cpp     | 11 +---
 2 files changed, 1 insertion(+), 93 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 4fdc00f3a74..fce9234952b 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1357,16 +1357,10 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
                                     size_t min_row,
                                     size_t total_rows)
 {
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   auto is_dict_chunk = [](const gpu::ColumnChunkDesc& chunk) {
     return (chunk.data_type & 0x7) == BYTE_ARRAY && chunk.num_dict_pages > 0;
   };
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   // Count the number of string dictionary entries
   // NOTE: Assumes first page in the chunk is always the dictionary page
   size_t total_str_dict_indexes = 0;
@@ -1380,9 +1374,6 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   auto str_dict_index = cudf::detail::make_zeroed_device_uvector_async<string_index_pair>(
     total_str_dict_indexes, _stream);
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   // TODO (dm): hd_vec should have begin and end iterator members
   size_t sum_max_depths =
     std::accumulate(chunks.host_ptr(),
@@ -1399,9 +1390,6 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   auto chunk_nested_data   = hostdevice_vector<void*>(sum_max_depths, _stream);
   auto chunk_offsets       = std::vector<size_t>();
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   // Update chunks with pointers to column data.
   for (size_t c = 0, page_count = 0, str_ofs = 0, chunk_off = 0; c < chunks.size(); c++) {
     input_column_info const& input_col = _input_columns[chunks[c].src_col_index];
@@ -1480,48 +1468,29 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
     page_count += chunks[c].max_num_pages;
   }
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   chunks.host_to_device(_stream);
   chunk_nested_valids.host_to_device(_stream);
   chunk_nested_data.host_to_device(_stream);
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   if (total_str_dict_indexes > 0) {
     gpu::BuildStringDictionaryIndex(chunks.device_ptr(), chunks.size(), _stream);
   }
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   printf("read total_rows = %d, min_row = %d\n", (int)total_rows, (int)min_row);
-  fflush(stdout);
 
   printf("pages size= %d, chunk size = %d, pages = %zu\n",
          (int)pages.size(),
          (int)chunks.size(),
          (size_t)pages.device_ptr());
-  fflush(stdout);
 
   gpu::DecodePageData(pages, chunks, total_rows, min_row, _stream);
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   _stream.synchronize();
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
 
   pages.device_to_host(_stream);
   page_nesting.device_to_host(_stream);
   _stream.synchronize();
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   // for list columns, add the final offset to every offset buffer.
   // TODO : make this happen in more efficiently. Maybe use thrust::for_each
   // on each buffer.  Or potentially do it in PreprocessColumnData
@@ -1555,9 +1524,6 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
     }
   }
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   // update null counts in the final column buffers
   for (size_t idx = 0; idx < pages.size(); idx++) {
     gpu::PageInfo* pi = &pages[idx];
@@ -1581,13 +1547,7 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
     }
   }
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   _stream.synchronize();
-
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
 }
 
 reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
@@ -1781,9 +1741,6 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
   std::vector<std::unique_ptr<column>> out_columns;
   out_columns.reserve(_output_columns.size());
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   // allocate outgoing columns
   allocate_columns(_file_itm_data.chunks,
                    _file_itm_data.pages_info,
@@ -1792,9 +1749,6 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
                    read_info.num_rows,
                    uses_custom_row_bounds);
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   printf("read skip_rows = %d, num_rows = %d\n", (int)read_info.skip_rows, (int)read_info.num_rows);
 
   // decoding column data
@@ -1804,9 +1758,6 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
                    read_info.skip_rows,
                    read_info.num_rows);
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   // create the final output cudf columns
   for (size_t i = 0; i < _output_columns.size(); ++i) {
     column_name_info& col_name = out_metadata.schema_info.emplace_back("");
@@ -1816,18 +1767,12 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
     out_columns.emplace_back(make_column(_output_columns[i], &col_name, metadata, _stream, _mr));
   }
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   return finalize_output(out_metadata, out_columns);
 }
 
 table_with_metadata reader::impl::finalize_output(table_metadata& out_metadata,
                                                   std::vector<std::unique_ptr<column>>& out_columns)
 {
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   // Create empty columns as needed (this can happen if we've ended up with no actual data to read)
   for (size_t i = out_columns.size(); i < _output_columns.size(); ++i) {
     column_name_info& col_name = out_metadata.schema_info.emplace_back("");
@@ -1846,9 +1791,6 @@ table_with_metadata reader::impl::finalize_output(table_metadata& out_metadata,
   out_metadata.user_data          = {out_metadata.per_file_user_data[0].begin(),
                             out_metadata.per_file_user_data[0].end()};
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   return {std::make_unique<table>(std::move(out_columns)), std::move(out_metadata)};
 }
 
@@ -1897,13 +1839,9 @@ table_with_metadata reader::impl::read_chunk()
                                 _timestamp_type.id());
   }
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
-
   //  if (!_file_preprocessed) {
   if (true) {
     printf("preprocessing from the beginning ===================line %d\n", __LINE__);
-    fflush(stdout);
 
     [[maybe_unused]] auto [skip_rows_corrected, num_rows_corrected] = preprocess_file(0, -1, {});
 
@@ -1929,34 +1867,20 @@ table_with_metadata reader::impl::read_chunk()
                          chunked_read_size);
     }
     _file_preprocessed = true;
-
-    printf("line %d\n", __LINE__);
-    fflush(stdout);
   }
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
   return read_chunk_internal(true);
 }
 
 bool reader::impl::has_next()
 {
   printf("prepr: %d\n", (int)_file_preprocessed);
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
 
   if (!_file_preprocessed) {
-    printf("line %d\n", __LINE__);
-    fflush(stdout);
     [[maybe_unused]] auto [skip_rows_corrected, num_rows_corrected] = preprocess_file(0, -1, {});
 
-    printf("line %d\n", __LINE__);
-    fflush(stdout);
     // todo: fix this (empty output may be incorrect)
     if (_file_itm_data.has_data) {
-      printf("line %d\n", __LINE__);
-      fflush(stdout);
-
       // - compute column sizes and allocate output buffers.
       //   important:
       //   for nested schemas, we have to do some further preprocessing to determine:
@@ -1975,17 +1899,10 @@ bool reader::impl::has_next()
                          num_rows_corrected,
                          true /*uses_custom_row_bounds*/,
                          chunked_read_size);
-
-      printf("line %d\n", __LINE__);
-      fflush(stdout);
     }
     _file_preprocessed = true;
-    printf("line %d\n", __LINE__);
-    fflush(stdout);
   }
 
-  printf("line %d\n", __LINE__);
-  fflush(stdout);
   return _current_read_chunk < _chunk_read_info.size();
 }
 
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index ea4ad22c833..411a96ff8d0 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -77,36 +77,27 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedRead)
   std::bernoulli_distribution bn(0.7f);
   auto values = thrust::make_counting_iterator(0);
 
-  printf("test, line %d\n\n", __LINE__);
   constexpr cudf::size_type num_rows = 40000;
   cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
   cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
 
-  printf("test, line %d\n\n", __LINE__);
-
   cudf::table_view t({a, b});
   cudf::io::parquet_writer_options opts = cudf::io::parquet_writer_options::builder(
     cudf::io::sink_info{"/tmp/chunked_splits.parquet"}, t);
   cudf::io::write_parquet(opts);
 
-  printf("test, line %d\n\n", __LINE__);
-
   cudf::io::chunked_parquet_reader_options in_opts =
     cudf::io::chunked_parquet_reader_options::builder(
       cudf::io::source_info{"/tmp/chunked_splits.parquet"});
 
-  printf("test, line %d\n\n", __LINE__);
-  fflush(stdout);
-
   cudf::io::chunked_parquet_reader reader(in_opts);
 
   int count{0};
   while (reader.has_next()) {
     printf("\n\nhas next %d\n\n", count++);
-    fflush(stdout);
+
     auto result = reader.read_chunk();
     printf("Result size: %d\n\n\n\n\n", result.tbl->num_rows());
-    fflush(stdout);
   }
 }
 #endif

From 631eff12652832bce751ed3c546fbaf09666ae00 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 18 Oct 2022 15:43:41 -0700
Subject: [PATCH 052/162] Fix tests

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_test.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index c72c17216f3..6e3a1b91329 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -81,7 +81,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedRead)
   cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
   cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
 
-  auto filepath = std::string{"/tmp/chunked_splits_strings.parquet"};
+  auto filepath = std::string{"/tmp/chunked_splits.parquet"};
   cudf::table_view t({a, b});
   cudf::io::parquet_writer_options opts =
     cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, t);
@@ -96,8 +96,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedRead)
   }
 
   cudf::io::chunked_parquet_reader_options in_opts =
-    cudf::io::chunked_parquet_reader_options::builder(
-      cudf::io::source_info{"/tmp/chunked_splits.parquet"});
+    cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath});
 
   cudf::io::chunked_parquet_reader reader(in_opts);
 
@@ -157,8 +156,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadString)
   }
 
   cudf::io::chunked_parquet_reader_options in_opts =
-    cudf::io::chunked_parquet_reader_options::builder(
-      cudf::io::source_info{"/tmp/chunked_splits.parquet"});
+    cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath});
 
   cudf::io::chunked_parquet_reader reader(in_opts);
 

From d1b4e4cef7f221e2775e331f117c7121595dac9f Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 18 Oct 2022 16:06:07 -0700
Subject: [PATCH 053/162] Fix chunk size limit

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu  | 6 +++---
 cpp/src/io/parquet/reader_impl.hpp | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 64b95c8a428..b9ccf29e11d 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1786,7 +1786,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   // - for nested schemas, output buffer offset values per-page, per nesting-level for the
   // purposes of decoding.
   // TODO: make this a parameter.
-  auto const chunked_read_size = 240000;
+
   //      auto const chunked_read_size = 0;
   preprocess_columns(_file_itm_data.chunks,
                      _file_itm_data.pages_info,
@@ -1828,7 +1828,7 @@ table_with_metadata reader::impl::read_chunk()
       // - for nested schemas, output buffer offset values per-page, per nesting-level for the
       // purposes of decoding.
       // TODO: make this a parameter.
-      auto const chunked_read_size = 240000;
+
       //      auto const chunked_read_size = 0;
       preprocess_columns(_file_itm_data.chunks,
                          _file_itm_data.pages_info,
@@ -1862,7 +1862,7 @@ bool reader::impl::has_next()
       // - for nested schemas, output buffer offset values per-page, per nesting-level for the
       // purposes of decoding.
       // TODO: make this a parameter.
-      auto const chunked_read_size = 240000;
+
       //      auto const chunked_read_size = 0;
       preprocess_columns(_file_itm_data.chunks,
                          _file_itm_data.pages_info,
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 140f6a854c9..536f6feffb4 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -274,6 +274,8 @@ class reader::impl {
   std::size_t _current_read_chunk{0};
   bool _file_preprocessed{false};
 
+  // TODO: Remove below
+  std::size_t chunked_read_size = 490000;
   parquet_reader_options const& _options;
 };
 

From 3f2f8a4d93cf97f2230688b4d16baea1d7da336c Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 18 Oct 2022 16:13:23 -0700
Subject: [PATCH 054/162] Turn back to do preprocess once

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index b9ccf29e11d..0a07d83640c 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1810,10 +1810,7 @@ table_with_metadata reader::impl::read_chunk()
                                 _timestamp_type.id());
   }
 
-  //  if (!_file_preprocessed) {
-  if (true) {
-    printf("preprocessing from the beginning ===================line %d\n", __LINE__);
-
+  if (!_file_preprocessed) {
     [[maybe_unused]] auto [skip_rows_corrected, num_rows_corrected] = preprocess_file(0, -1, {});
 
     // todo: fix this (empty output may be incorrect)

From 974e7eff7fe1203e2fac61cd6c5e6ec4d3207f67 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 09:28:06 -0700
Subject: [PATCH 055/162] The read limit parameter is now no longer const but
 truely runtime parameter

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp    |  5 +++++
 cpp/src/io/parquet/reader_impl.cu  | 20 ++++++++++++--------
 cpp/src/io/parquet/reader_impl.hpp |  2 +-
 cpp/tests/io/parquet_test.cpp      |  2 ++
 4 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 337749a5ebe..7caa1d65d1f 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -89,6 +89,11 @@ class parquet_reader_options {
    */
   explicit parquet_reader_options() = default;
 
+  /**
+   * @brief Default destructor, needs to be virtual for polymorphism.
+   */
+  virtual ~parquet_reader_options() = default;
+
   /**
    * @brief Creates a parquet_reader_options_builder which will build parquet_reader_options.
    *
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 0a07d83640c..ea1e53bb879 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1547,6 +1547,13 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                               options.is_enabled_use_pandas_metadata(),
                               _strings_to_categorical,
                               _timestamp_type.id());
+
+  // If the options passed in is an instance of `chunked_parquet_reader_options`, extract the
+  // `byte_limit` parameter.
+  if (auto const chunked_options = dynamic_cast<chunked_parquet_reader_options const*>(&options);
+      chunked_options) {
+    _chunk_read_limit = chunked_options->get_byte_limit();
+  }
 }
 
 std::pair<size_type, size_type> reader::impl::preprocess_file(
@@ -1787,13 +1794,13 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   // purposes of decoding.
   // TODO: make this a parameter.
 
-  //      auto const chunked_read_size = 0;
+  //      auto const _chunk_read_limit = 0;
   preprocess_columns(_file_itm_data.chunks,
                      _file_itm_data.pages_info,
                      skip_rows_corrected,
                      num_rows_corrected,
                      uses_custom_row_bounds,
-                     chunked_read_size);
+                     _chunk_read_limit);
 
   return read_chunk_internal(uses_custom_row_bounds);
 }
@@ -1824,15 +1831,12 @@ table_with_metadata reader::impl::read_chunk()
       //
       // - for nested schemas, output buffer offset values per-page, per nesting-level for the
       // purposes of decoding.
-      // TODO: make this a parameter.
-
-      //      auto const chunked_read_size = 0;
       preprocess_columns(_file_itm_data.chunks,
                          _file_itm_data.pages_info,
                          skip_rows_corrected,
                          num_rows_corrected,
                          true /*uses_custom_row_bounds*/,
-                         chunked_read_size);
+                         _chunk_read_limit);
     }
     _file_preprocessed = true;
   }
@@ -1860,13 +1864,13 @@ bool reader::impl::has_next()
       // purposes of decoding.
       // TODO: make this a parameter.
 
-      //      auto const chunked_read_size = 0;
+      //      auto const _chunk_read_limit = 0;
       preprocess_columns(_file_itm_data.chunks,
                          _file_itm_data.pages_info,
                          skip_rows_corrected,
                          num_rows_corrected,
                          true /*uses_custom_row_bounds*/,
-                         chunked_read_size);
+                         _chunk_read_limit);
     }
     _file_preprocessed = true;
   }
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 536f6feffb4..a3b7f7594e4 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -271,11 +271,11 @@ class reader::impl {
   cudf::io::parquet::gpu::file_intermediate_data _file_itm_data;
   cudf::io::parquet::gpu::chunk_intermediate_data _chunk_itm_data;
   std::vector<cudf::io::parquet::gpu::chunk_read_info> _chunk_read_info;
+  std::size_t _chunk_read_limit{0};
   std::size_t _current_read_chunk{0};
   bool _file_preprocessed{false};
 
   // TODO: Remove below
-  std::size_t chunked_read_size = 490000;
   parquet_reader_options const& _options;
 };
 
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 6e3a1b91329..7052bc1cbc9 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -97,6 +97,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedRead)
 
   cudf::io::chunked_parquet_reader_options in_opts =
     cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath});
+  in_opts.set_byte_limit(240000);
 
   cudf::io::chunked_parquet_reader reader(in_opts);
 
@@ -157,6 +158,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadString)
 
   cudf::io::chunked_parquet_reader_options in_opts =
     cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath});
+  in_opts.set_byte_limit(500000);
 
   cudf::io::chunked_parquet_reader reader(in_opts);
 

From 0be096bcc22fea16f4f0059baa227605d1e51f11 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 09:32:00 -0700
Subject: [PATCH 056/162] Add new test file

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/CMakeLists.txt                     |   2 +-
 cpp/tests/io/parquet_chunked_reader_test.cpp | 173 +++++++++++++++++++
 2 files changed, 174 insertions(+), 1 deletion(-)
 create mode 100644 cpp/tests/io/parquet_chunked_reader_test.cpp

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 8675dc891c1..70eaf055bfe 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -223,7 +223,7 @@ ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cpp)
 ConfigureTest(CSV_TEST io/csv_test.cpp)
 ConfigureTest(FILE_IO_TEST io/file_io_test.cpp)
 ConfigureTest(ORC_TEST io/orc_test.cpp)
-ConfigureTest(PARQUET_TEST io/parquet_test.cpp)
+ConfigureTest(PARQUET_TEST io/parquet_test.cpp io/parquet_chunked_reader_test.cpp)
 ConfigureTest(JSON_TEST io/json_test.cpp)
 ConfigureTest(JSON_TYPE_CAST_TEST io/json_type_cast_test.cu)
 ConfigureTest(NESTED_JSON_TEST io/nested_json_test.cpp io/json_tree.cpp)
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
new file mode 100644
index 00000000000..7052bc1cbc9
--- /dev/null
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/io_metadata_utilities.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/table_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/concatenate.hpp>
+#include <cudf/copying.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/fixed_point/fixed_point.hpp>
+#include <cudf/io/data_sink.hpp>
+#include <cudf/io/datasource.hpp>
+#include <cudf/io/parquet.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <src/io/parquet/compact_protocol_reader.hpp>
+#include <src/io/parquet/parquet.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+
+#include <fstream>
+#include <type_traits>
+
+struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
+};
+
+#if 0
+TEST_F(ParquetChunkedReaderTest, Test)
+{
+  std::mt19937 gen(6542);
+  std::bernoulli_distribution bn(0.7f);
+  auto values = thrust::make_counting_iterator(0);
+
+  constexpr cudf::size_type num_rows = 40000;
+  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
+  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
+
+  cudf::table_view t({a, b});
+  cudf::io::parquet_writer_options opts = cudf::io::parquet_writer_options::builder(
+    cudf::io::sink_info{"/tmp/chunked_splits.parquet"}, t);
+  cudf::io::write_parquet(opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{"/tmp/chunked_splits.parquet"});
+  auto result = cudf::io::read_parquet(in_opts);
+  printf("\nResult size read all: %d\n\n", result.tbl->num_rows());
+}
+
+#else
+TEST_F(ParquetChunkedReaderTest, TestChunkedRead)
+{
+  std::mt19937 gen(6542);
+  std::bernoulli_distribution bn(0.7f);
+  auto values = thrust::make_counting_iterator(0);
+
+  constexpr cudf::size_type num_rows = 40000;
+  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
+  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
+
+  auto filepath = std::string{"/tmp/chunked_splits.parquet"};
+  cudf::table_view t({a, b});
+  cudf::io::parquet_writer_options opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, t);
+  cudf::io::write_parquet(opts);
+
+  //========================================================================================
+  {
+    cudf::io::parquet_reader_options in_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    auto result = cudf::io::read_parquet(in_opts);
+    printf("Result size read full: %d\n\n\n\n\n", result.tbl->num_rows());
+  }
+
+  cudf::io::chunked_parquet_reader_options in_opts =
+    cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath});
+  in_opts.set_byte_limit(240000);
+
+  cudf::io::chunked_parquet_reader reader(in_opts);
+
+  int count{0};
+  while (reader.has_next()) {
+    printf("\n\nhas next %d\n\n", count++);
+
+    auto result = reader.read_chunk();
+    printf("Result size: %d\n\n\n\n\n", result.tbl->num_rows());
+  }
+}
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadString)
+{
+  // values the cudf parquet writer uses
+  // constexpr size_t default_max_page_size_bytes    = 512 * 1024;   ///< 512KB per page
+  // constexpr size_type default_max_page_size_rows  = 20000;        ///< 20k rows per page
+  std::mt19937 gen(6542);
+  std::bernoulli_distribution bn(0.7f);
+  auto values                        = thrust::make_counting_iterator(0);
+  constexpr cudf::size_type num_rows = 60000;
+  // ints                                            Page    total bytes   cumulative bytes
+  // 20000 rows of 4 bytes each                    = A0      80000         80000
+  // 20000 rows of 4 bytes each                    = A1      80000         160000
+  // 20000 rows of 4 bytes each                    = A2      80000         240000
+  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
+  // strings                                         Page    total bytes   cumulative bytes
+  // 20000 rows of 1 char each    (20000  + 80004) = B0      100004        100004
+  // 20000 rows of 4 chars each   (80000  + 80004) = B1      160004        260008
+  // 20000 rows of 16 chars each  (320000 + 80004) = B2      400004        660012
+  std::vector<std::string> strings{"a", "bbbb", "cccccccccccccccc"};
+  auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int i) {
+    if (i < 20000) { return strings[0]; }
+    if (i < 40000) { return strings[1]; }
+    return strings[2];
+  });
+  cudf::test::strings_column_wrapper b{str_iter, str_iter + num_rows};
+  // cumulative sizes
+  // A0 + B0 :  180004
+  // A1 + B1 :  420008
+  // A2 + B2 :  900012
+  //                                                    skip_rows / num_rows
+  // chunked_read_size of 500000  should give 2 chunks: {0, 40000},           {40000, 20000}
+  // chunked_read_size of 1000000 should give 1 chunks: {0, 60000},
+  auto write_tbl = cudf::table_view{{a, b}};
+  auto filepath  = std::string{"/tmp/chunked_splits_strings.parquet"};
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, write_tbl);
+  cudf::io::write_parquet(out_opts);
+  //========================================================================================
+
+  {
+    cudf::io::parquet_reader_options in_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    auto result = cudf::io::read_parquet(in_opts);
+    printf("Result size read full: %d\n\n\n\n\n", result.tbl->num_rows());
+  }
+
+  cudf::io::chunked_parquet_reader_options in_opts =
+    cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath});
+  in_opts.set_byte_limit(500000);
+
+  cudf::io::chunked_parquet_reader reader(in_opts);
+
+  int count{0};
+  while (reader.has_next()) {
+    printf("\n\nhas next %d\n\n", count++);
+
+    auto result = reader.read_chunk();
+    printf("Result size: %d\n\n\n\n\n", result.tbl->num_rows());
+  }
+}
+#endif

From f7018fe0cf6d573566c6de8cf0f9e58f919e7434 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 09:35:10 -0700
Subject: [PATCH 057/162] Reverse `parquet_test.cpp`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_test.cpp | 4795 ++++++++++++++++++++++++++++++++-
 1 file changed, 4697 insertions(+), 98 deletions(-)

diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 7052bc1cbc9..b13e875eabd 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -45,129 +45,4728 @@
 #include <fstream>
 #include <type_traits>
 
-struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
+template <typename T, typename SourceElementT = T>
+using column_wrapper =
+  typename std::conditional<std::is_same_v<T, cudf::string_view>,
+                            cudf::test::strings_column_wrapper,
+                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
+using column     = cudf::column;
+using table      = cudf::table;
+using table_view = cudf::table_view;
+
+// Global environment for temporary files
+auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
+  ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
+
+template <typename T, typename Elements>
+std::unique_ptr<cudf::table> create_fixed_table(cudf::size_type num_columns,
+                                                cudf::size_type num_rows,
+                                                bool include_validity,
+                                                Elements elements)
+{
+  auto valids = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return i % 2 == 0 ? true : false; });
+  std::vector<cudf::test::fixed_width_column_wrapper<T>> src_cols(num_columns);
+  for (int idx = 0; idx < num_columns; idx++) {
+    if (include_validity) {
+      src_cols[idx] =
+        cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_rows, valids);
+    } else {
+      src_cols[idx] = cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_rows);
+    }
+  }
+  std::vector<std::unique_ptr<cudf::column>> columns(num_columns);
+  std::transform(src_cols.begin(),
+                 src_cols.end(),
+                 columns.begin(),
+                 [](cudf::test::fixed_width_column_wrapper<T>& in) {
+                   auto ret = in.release();
+                   // pre-cache the null count
+                   [[maybe_unused]] auto const nulls = ret->has_nulls();
+                   return ret;
+                 });
+  return std::make_unique<cudf::table>(std::move(columns));
+}
+
+template <typename T>
+std::unique_ptr<cudf::table> create_random_fixed_table(cudf::size_type num_columns,
+                                                       cudf::size_type num_rows,
+                                                       bool include_validity)
+{
+  auto rand_elements =
+    cudf::detail::make_counting_transform_iterator(0, [](T i) { return rand(); });
+  return create_fixed_table<T>(num_columns, num_rows, include_validity, rand_elements);
+}
+
+template <typename T>
+std::unique_ptr<cudf::table> create_compressible_fixed_table(cudf::size_type num_columns,
+                                                             cudf::size_type num_rows,
+                                                             cudf::size_type period,
+                                                             bool include_validity)
+{
+  auto compressible_elements =
+    cudf::detail::make_counting_transform_iterator(0, [period](T i) { return i / period; });
+  return create_fixed_table<T>(num_columns, num_rows, include_validity, compressible_elements);
+}
+
+// this function replicates the "list_gen" function in
+// python/cudf/cudf/tests/test_parquet.py
+template <typename T>
+std::unique_ptr<cudf::column> make_parquet_list_list_col(
+  int skip_rows, int num_rows, int lists_per_row, int list_size, bool include_validity)
+{
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0 ? 1 : 0; });
+
+  // root list
+  std::vector<int> row_offsets(num_rows + 1);
+  int row_offset_count = 0;
+  {
+    int offset = 0;
+    for (int idx = 0; idx < (num_rows) + 1; idx++) {
+      row_offsets[row_offset_count] = offset;
+      if (!include_validity || valids[idx]) { offset += lists_per_row; }
+      row_offset_count++;
+    }
+  }
+  cudf::test::fixed_width_column_wrapper<int> offsets(row_offsets.begin(),
+                                                      row_offsets.begin() + row_offset_count);
+
+  // child list
+  std::vector<int> child_row_offsets((num_rows * lists_per_row) + 1);
+  int child_row_offset_count = 0;
+  {
+    int offset = 0;
+    for (int idx = 0; idx < (num_rows * lists_per_row); idx++) {
+      int row_index = idx / lists_per_row;
+      if (include_validity && !valids[row_index]) { continue; }
+
+      child_row_offsets[child_row_offset_count] = offset;
+      offset += list_size;
+      child_row_offset_count++;
+    }
+    child_row_offsets[child_row_offset_count++] = offset;
+  }
+  cudf::test::fixed_width_column_wrapper<int> child_offsets(
+    child_row_offsets.begin(), child_row_offsets.begin() + child_row_offset_count);
+
+  // child values
+  std::vector<T> child_values(num_rows * lists_per_row * list_size);
+  T first_child_value_index = skip_rows * lists_per_row * list_size;
+  int child_value_count     = 0;
+  {
+    for (int idx = 0; idx < (num_rows * lists_per_row * list_size); idx++) {
+      int row_index = idx / (lists_per_row * list_size);
+
+      int val = first_child_value_index;
+      first_child_value_index++;
+
+      if (include_validity && !valids[row_index]) { continue; }
+
+      child_values[child_value_count] = val;
+      child_value_count++;
+    }
+  }
+  // validity by value instead of index
+  auto valids2 = cudf::detail::make_counting_transform_iterator(
+    0, [list_size](auto i) { return (i % list_size) % 2 == 0 ? 1 : 0; });
+  auto child_data = include_validity
+                      ? cudf::test::fixed_width_column_wrapper<T>(
+                          child_values.begin(), child_values.begin() + child_value_count, valids2)
+                      : cudf::test::fixed_width_column_wrapper<T>(
+                          child_values.begin(), child_values.begin() + child_value_count);
+
+  int child_offsets_size = static_cast<cudf::column_view>(child_offsets).size() - 1;
+  auto child             = cudf::make_lists_column(
+    child_offsets_size, child_offsets.release(), child_data.release(), 0, rmm::device_buffer{});
+
+  int offsets_size = static_cast<cudf::column_view>(offsets).size() - 1;
+  return include_validity
+           ? cudf::make_lists_column(
+               offsets_size,
+               offsets.release(),
+               std::move(child),
+               cudf::UNKNOWN_NULL_COUNT,
+               cudf::test::detail::make_null_mask(valids, valids + offsets_size))
+           : cudf::make_lists_column(
+               offsets_size, offsets.release(), std::move(child), 0, rmm::device_buffer{});
+}
+
+// given a datasource pointing to a parquet file, read the footer
+// of the file to populate the FileMetaData pointed to by file_meta_data.
+// throws cudf::logic_error if the file or metadata is invalid.
+void read_footer(const std::unique_ptr<cudf::io::datasource>& source,
+                 cudf::io::parquet::FileMetaData* file_meta_data)
+{
+  constexpr auto header_len = sizeof(cudf::io::parquet::file_header_s);
+  constexpr auto ender_len  = sizeof(cudf::io::parquet::file_ender_s);
+
+  const auto len           = source->size();
+  const auto header_buffer = source->host_read(0, header_len);
+  const auto header =
+    reinterpret_cast<const cudf::io::parquet::file_header_s*>(header_buffer->data());
+  const auto ender_buffer = source->host_read(len - ender_len, ender_len);
+  const auto ender = reinterpret_cast<const cudf::io::parquet::file_ender_s*>(ender_buffer->data());
+
+  // checks for valid header, footer, and file length
+  CUDF_EXPECTS(len > header_len + ender_len, "Incorrect data source");
+  CUDF_EXPECTS(header->magic == cudf::io::parquet::parquet_magic &&
+                 ender->magic == cudf::io::parquet::parquet_magic,
+               "Corrupted header or footer");
+  CUDF_EXPECTS(ender->footer_len != 0 && ender->footer_len <= (len - header_len - ender_len),
+               "Incorrect footer length");
+
+  // parquet files end with 4-byte footer_length and 4-byte magic == "PAR1"
+  // seek backwards from the end of the file (footer_length + 8 bytes of ender)
+  const auto footer_buffer =
+    source->host_read(len - ender->footer_len - ender_len, ender->footer_len);
+  cudf::io::parquet::CompactProtocolReader cp(footer_buffer->data(), ender->footer_len);
+
+  // returns true on success
+  bool res = cp.read(file_meta_data);
+  CUDF_EXPECTS(res, "Cannot parse file metadata");
+}
+
+// returns the number of bits used for dictionary encoding data at the given page location.
+// this assumes the data is uncompressed.
+// throws cudf::logic_error if the page_loc data is invalid.
+int read_dict_bits(const std::unique_ptr<cudf::io::datasource>& source,
+                   const cudf::io::parquet::PageLocation& page_loc)
+{
+  CUDF_EXPECTS(page_loc.offset > 0, "Cannot find page header");
+  CUDF_EXPECTS(page_loc.compressed_page_size > 0, "Invalid page header length");
+
+  cudf::io::parquet::PageHeader page_hdr;
+  const auto page_buf = source->host_read(page_loc.offset, page_loc.compressed_page_size);
+  cudf::io::parquet::CompactProtocolReader cp(page_buf->data(), page_buf->size());
+  bool res = cp.read(&page_hdr);
+  CUDF_EXPECTS(res, "Cannot parse page header");
+
+  // cp should be pointing at the start of page data now. the first byte
+  // should be the encoding bit size
+  return cp.getb();
+}
+
+// read column index from datasource at location indicated by chunk,
+// parse and return as a ColumnIndex struct.
+// throws cudf::logic_error if the chunk data is invalid.
+cudf::io::parquet::ColumnIndex read_column_index(
+  const std::unique_ptr<cudf::io::datasource>& source, const cudf::io::parquet::ColumnChunk& chunk)
+{
+  CUDF_EXPECTS(chunk.column_index_offset > 0, "Cannot find column index");
+  CUDF_EXPECTS(chunk.column_index_length > 0, "Invalid column index length");
+
+  cudf::io::parquet::ColumnIndex colidx;
+  const auto ci_buf = source->host_read(chunk.column_index_offset, chunk.column_index_length);
+  cudf::io::parquet::CompactProtocolReader cp(ci_buf->data(), ci_buf->size());
+  bool res = cp.read(&colidx);
+  CUDF_EXPECTS(res, "Cannot parse column index");
+  return colidx;
+}
+
+// read offset index from datasource at location indicated by chunk,
+// parse and return as an OffsetIndex struct.
+// throws cudf::logic_error if the chunk data is invalid.
+cudf::io::parquet::OffsetIndex read_offset_index(
+  const std::unique_ptr<cudf::io::datasource>& source, const cudf::io::parquet::ColumnChunk& chunk)
+{
+  CUDF_EXPECTS(chunk.offset_index_offset > 0, "Cannot find offset index");
+  CUDF_EXPECTS(chunk.offset_index_length > 0, "Invalid offset index length");
+
+  cudf::io::parquet::OffsetIndex offidx;
+  const auto oi_buf = source->host_read(chunk.offset_index_offset, chunk.offset_index_length);
+  cudf::io::parquet::CompactProtocolReader cp(oi_buf->data(), oi_buf->size());
+  bool res = cp.read(&offidx);
+  CUDF_EXPECTS(res, "Cannot parse offset index");
+  return offidx;
+}
+
+// parse the statistics_blob on chunk and return as a Statistics struct.
+// throws cudf::logic_error if the chunk statistics_blob is invalid.
+cudf::io::parquet::Statistics parse_statistics(const cudf::io::parquet::ColumnChunk& chunk)
+{
+  auto& stats_blob = chunk.meta_data.statistics_blob;
+  CUDF_EXPECTS(stats_blob.size() > 0, "Invalid statistics length");
+
+  cudf::io::parquet::Statistics stats;
+  cudf::io::parquet::CompactProtocolReader cp(stats_blob.data(), stats_blob.size());
+  bool res = cp.read(&stats);
+  CUDF_EXPECTS(res, "Cannot parse column statistics");
+  return stats;
+}
+
+// read page header from datasource at location indicated by page_loc,
+// parse and return as a PageHeader struct.
+// throws cudf::logic_error if the page_loc data is invalid.
+cudf::io::parquet::PageHeader read_page_header(const std::unique_ptr<cudf::io::datasource>& source,
+                                               const cudf::io::parquet::PageLocation& page_loc)
+{
+  CUDF_EXPECTS(page_loc.offset > 0, "Cannot find page header");
+  CUDF_EXPECTS(page_loc.compressed_page_size > 0, "Invalid page header length");
+
+  cudf::io::parquet::PageHeader page_hdr;
+  const auto page_buf = source->host_read(page_loc.offset, page_loc.compressed_page_size);
+  cudf::io::parquet::CompactProtocolReader cp(page_buf->data(), page_buf->size());
+  bool res = cp.read(&page_hdr);
+  CUDF_EXPECTS(res, "Cannot parse page header");
+  return page_hdr;
+}
+
+// Base test fixture for tests
+struct ParquetWriterTest : public cudf::test::BaseFixture {
+};
+
+// Base test fixture for tests
+struct ParquetReaderTest : public cudf::test::BaseFixture {
+};
+
+// Base test fixture for "stress" tests
+struct ParquetWriterStressTest : public cudf::test::BaseFixture {
+};
+
+// Typed test fixture for numeric type tests
+template <typename T>
+struct ParquetWriterNumericTypeTest : public ParquetWriterTest {
+  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
 };
 
-#if 0
-TEST_F(ParquetChunkedReaderTest, Test)
+// Typed test fixture for comparable type tests
+template <typename T>
+struct ParquetWriterComparableTypeTest : public ParquetWriterTest {
+  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
+};
+
+// Typed test fixture for timestamp type tests
+template <typename T>
+struct ParquetWriterChronoTypeTest : public ParquetWriterTest {
+  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
+};
+
+// Typed test fixture for timestamp type tests
+template <typename T>
+struct ParquetWriterTimestampTypeTest : public ParquetWriterTest {
+  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
+};
+
+// Typed test fixture for all types
+template <typename T>
+struct ParquetWriterSchemaTest : public ParquetWriterTest {
+  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
+};
+
+// Declare typed test cases
+// TODO: Replace with `NumericTypes` when unsigned support is added. Issue #5352
+using SupportedTypes = cudf::test::Types<int8_t, int16_t, int32_t, int64_t, bool, float, double>;
+TYPED_TEST_SUITE(ParquetWriterNumericTypeTest, SupportedTypes);
+using ComparableAndFixedTypes =
+  cudf::test::Concat<cudf::test::ComparableTypes, cudf::test::FixedPointTypes>;
+TYPED_TEST_SUITE(ParquetWriterComparableTypeTest, ComparableAndFixedTypes);
+TYPED_TEST_SUITE(ParquetWriterChronoTypeTest, cudf::test::ChronoTypes);
+using SupportedTimestampTypes =
+  cudf::test::Types<cudf::timestamp_ms, cudf::timestamp_us, cudf::timestamp_ns>;
+TYPED_TEST_SUITE(ParquetWriterTimestampTypeTest, SupportedTimestampTypes);
+TYPED_TEST_SUITE(ParquetWriterSchemaTest, cudf::test::AllTypes);
+
+// Base test fixture for chunked writer tests
+struct ParquetChunkedWriterTest : public cudf::test::BaseFixture {
+};
+
+// Typed test fixture for numeric type tests
+template <typename T>
+struct ParquetChunkedWriterNumericTypeTest : public ParquetChunkedWriterTest {
+  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
+};
+
+// Declare typed test cases
+TYPED_TEST_SUITE(ParquetChunkedWriterNumericTypeTest, SupportedTypes);
+
+// Base test fixture for size-parameterized tests
+class ParquetSizedTest : public ::testing::TestWithParam<int> {
+};
+
+// test the allowed bit widths for dictionary encoding
+// values chosen to trigger 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, and 24 bit dictionaries
+INSTANTIATE_TEST_SUITE_P(ParquetDictionaryTest,
+                         ParquetSizedTest,
+                         testing::Range(1, 25),
+                         testing::PrintToStringParamName());
+
+namespace {
+// Generates a vector of uniform random values of type T
+template <typename T>
+inline auto random_values(size_t size)
+{
+  std::vector<T> values(size);
+
+  using T1 = T;
+  using uniform_distribution =
+    typename std::conditional_t<std::is_same_v<T1, bool>,
+                                std::bernoulli_distribution,
+                                std::conditional_t<std::is_floating_point_v<T1>,
+                                                   std::uniform_real_distribution<T1>,
+                                                   std::uniform_int_distribution<T1>>>;
+
+  static constexpr auto seed = 0xf00d;
+  static std::mt19937 engine{seed};
+  static uniform_distribution dist{};
+  std::generate_n(values.begin(), size, [&]() { return T{dist(engine)}; });
+
+  return values;
+}
+
+}  // namespace
+
+TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumn)
+{
+  auto sequence =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return TypeParam(i % 400); });
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  constexpr auto num_rows = 800;
+  column_wrapper<TypeParam> col(sequence, sequence + num_rows, validity);
+
+  auto expected = table_view{{col}};
+
+  auto filepath = temp_env->get_temp_filepath("SingleColumn.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumnWithNulls)
+{
+  auto sequence =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return TypeParam(i); });
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2); });
+
+  constexpr auto num_rows = 100;
+  column_wrapper<TypeParam> col(sequence, sequence + num_rows, validity);
+
+  auto expected = table_view{{col}};
+
+  auto filepath = temp_env->get_temp_filepath("SingleColumnWithNulls.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TYPED_TEST(ParquetWriterChronoTypeTest, Chronos)
+{
+  auto sequence = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return ((std::rand() / 10000) * 1000); });
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  constexpr auto num_rows = 100;
+  column_wrapper<TypeParam, typename decltype(sequence)::value_type> col(
+    sequence, sequence + num_rows, validity);
+
+  auto expected = table_view{{col}};
+
+  auto filepath = temp_env->get_temp_filepath("Chronos.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .timestamp_type(this->type());
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TYPED_TEST(ParquetWriterChronoTypeTest, ChronosWithNulls)
+{
+  auto sequence = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return ((std::rand() / 10000) * 1000); });
+  auto validity =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 30) && (i < 60); });
+
+  constexpr auto num_rows = 100;
+  column_wrapper<TypeParam, typename decltype(sequence)::value_type> col(
+    sequence, sequence + num_rows, validity);
+
+  auto expected = table_view{{col}};
+
+  auto filepath = temp_env->get_temp_filepath("ChronosWithNulls.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .timestamp_type(this->type());
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TYPED_TEST(ParquetWriterTimestampTypeTest, TimestampOverflow)
+{
+  constexpr int64_t max = std::numeric_limits<int64_t>::max();
+  auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return max - i; });
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  constexpr auto num_rows = 100;
+  column_wrapper<TypeParam, typename decltype(sequence)::value_type> col(
+    sequence, sequence + num_rows, validity);
+  table_view expected({col});
+
+  auto filepath = temp_env->get_temp_filepath("ParquetTimestampOverflow.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .timestamp_type(this->type());
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TEST_F(ParquetWriterTest, MultiColumn)
+{
+  constexpr auto num_rows = 100000;
+
+  // auto col0_data = random_values<bool>(num_rows);
+  auto col1_data = random_values<int8_t>(num_rows);
+  auto col2_data = random_values<int16_t>(num_rows);
+  auto col3_data = random_values<int32_t>(num_rows);
+  auto col4_data = random_values<float>(num_rows);
+  auto col5_data = random_values<double>(num_rows);
+  auto col6_vals = random_values<int16_t>(num_rows);
+  auto col7_vals = random_values<int32_t>(num_rows);
+  auto col8_vals = random_values<int64_t>(num_rows);
+  auto col6_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) {
+    return numeric::decimal32{col6_vals[i], numeric::scale_type{5}};
+  });
+  auto col7_data = cudf::detail::make_counting_transform_iterator(0, [col7_vals](auto i) {
+    return numeric::decimal64{col7_vals[i], numeric::scale_type{-5}};
+  });
+  auto col8_data = cudf::detail::make_counting_transform_iterator(0, [col8_vals](auto i) {
+    return numeric::decimal128{col8_vals[i], numeric::scale_type{-6}};
+  });
+  auto validity  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  // column_wrapper<bool> col0{
+  //    col0_data.begin(), col0_data.end(), validity};
+  column_wrapper<int8_t> col1{col1_data.begin(), col1_data.end(), validity};
+  column_wrapper<int16_t> col2{col2_data.begin(), col2_data.end(), validity};
+  column_wrapper<int32_t> col3{col3_data.begin(), col3_data.end(), validity};
+  column_wrapper<float> col4{col4_data.begin(), col4_data.end(), validity};
+  column_wrapper<double> col5{col5_data.begin(), col5_data.end(), validity};
+  column_wrapper<numeric::decimal32> col6{col6_data, col6_data + num_rows, validity};
+  column_wrapper<numeric::decimal64> col7{col7_data, col7_data + num_rows, validity};
+  column_wrapper<numeric::decimal128> col8{col8_data, col8_data + num_rows, validity};
+
+  auto expected = table_view{{col1, col2, col3, col4, col5, col6, col7, col8}};
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  // expected_metadata.column_metadata[0].set_name( "bools");
+  expected_metadata.column_metadata[0].set_name("int8s");
+  expected_metadata.column_metadata[1].set_name("int16s");
+  expected_metadata.column_metadata[2].set_name("int32s");
+  expected_metadata.column_metadata[3].set_name("floats");
+  expected_metadata.column_metadata[4].set_name("doubles");
+  expected_metadata.column_metadata[5].set_name("decimal32s").set_decimal_precision(10);
+  expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20);
+  expected_metadata.column_metadata[7].set_name("decimal128s").set_decimal_precision(40);
+
+  auto filepath = temp_env->get_temp_filepath("MultiColumn.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(&expected_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetWriterTest, MultiColumnWithNulls)
+{
+  constexpr auto num_rows = 100;
+
+  // auto col0_data = random_values<bool>(num_rows);
+  auto col1_data = random_values<int8_t>(num_rows);
+  auto col2_data = random_values<int16_t>(num_rows);
+  auto col3_data = random_values<int32_t>(num_rows);
+  auto col4_data = random_values<float>(num_rows);
+  auto col5_data = random_values<double>(num_rows);
+  auto col6_vals = random_values<int32_t>(num_rows);
+  auto col7_vals = random_values<int64_t>(num_rows);
+  auto col6_data = cudf::detail::make_counting_transform_iterator(0, [col6_vals](auto i) {
+    return numeric::decimal32{col6_vals[i], numeric::scale_type{-2}};
+  });
+  auto col7_data = cudf::detail::make_counting_transform_iterator(0, [col7_vals](auto i) {
+    return numeric::decimal64{col7_vals[i], numeric::scale_type{-8}};
+  });
+  // auto col0_mask = cudf::detail::make_counting_transform_iterator(
+  //    0, [](auto i) { return (i % 2); });
+  auto col1_mask =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i < 10); });
+  auto col2_mask = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+  auto col3_mask =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i == (num_rows - 1)); });
+  auto col4_mask =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i >= 40 && i <= 60); });
+  auto col5_mask =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i > 80); });
+  auto col6_mask =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 5); });
+  auto col7_mask =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i != 55); });
+
+  // column_wrapper<bool> col0{
+  //    col0_data.begin(), col0_data.end(), col0_mask};
+  column_wrapper<int8_t> col1{col1_data.begin(), col1_data.end(), col1_mask};
+  column_wrapper<int16_t> col2{col2_data.begin(), col2_data.end(), col2_mask};
+  column_wrapper<int32_t> col3{col3_data.begin(), col3_data.end(), col3_mask};
+  column_wrapper<float> col4{col4_data.begin(), col4_data.end(), col4_mask};
+  column_wrapper<double> col5{col5_data.begin(), col5_data.end(), col5_mask};
+  column_wrapper<numeric::decimal32> col6{col6_data, col6_data + num_rows, col6_mask};
+  column_wrapper<numeric::decimal64> col7{col7_data, col7_data + num_rows, col7_mask};
+
+  auto expected = table_view{{/*col0, */ col1, col2, col3, col4, col5, col6, col7}};
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  // expected_metadata.column_names.emplace_back("bools");
+  expected_metadata.column_metadata[0].set_name("int8s");
+  expected_metadata.column_metadata[1].set_name("int16s");
+  expected_metadata.column_metadata[2].set_name("int32s");
+  expected_metadata.column_metadata[3].set_name("floats");
+  expected_metadata.column_metadata[4].set_name("doubles");
+  expected_metadata.column_metadata[5].set_name("decimal32s").set_decimal_precision(9);
+  expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20);
+
+  auto filepath = temp_env->get_temp_filepath("MultiColumnWithNulls.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(&expected_metadata);
+
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  // TODO: Need to be able to return metadata in tree form from reader so they can be compared.
+  // Unfortunately the closest thing to a hierarchical schema is column_name_info which does not
+  // have any tests for it c++ or python.
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetWriterTest, Strings)
+{
+  std::vector<const char*> strings{
+    "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
+  const auto num_rows = strings.size();
+
+  auto seq_col0 = random_values<int>(num_rows);
+  auto seq_col2 = random_values<float>(num_rows);
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  column_wrapper<int> col0{seq_col0.begin(), seq_col0.end(), validity};
+  column_wrapper<cudf::string_view> col1{strings.begin(), strings.end()};
+  column_wrapper<float> col2{seq_col2.begin(), seq_col2.end(), validity};
+
+  auto expected = table_view{{col0, col1, col2}};
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("col_other");
+  expected_metadata.column_metadata[1].set_name("col_string");
+  expected_metadata.column_metadata[2].set_name("col_another");
+
+  auto filepath = temp_env->get_temp_filepath("Strings.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(&expected_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetWriterTest, StringsAsBinary)
+{
+  std::vector<const char*> unicode_strings{
+    "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
+  std::vector<const char*> ascii_strings{
+    "Monday", "Wednesday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
+
+  column_wrapper<cudf::string_view> col0{ascii_strings.begin(), ascii_strings.end()};
+  column_wrapper<cudf::string_view> col1{unicode_strings.begin(), unicode_strings.end()};
+  column_wrapper<cudf::string_view> col2{ascii_strings.begin(), ascii_strings.end()};
+  cudf::test::lists_column_wrapper<uint8_t> col3{{'M', 'o', 'n', 'd', 'a', 'y'},
+                                                 {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
+                                                 {'M', 'o', 'n', 'd', 'a', 'y'},
+                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
+                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
+                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
+                                                 {'F', 'u', 'n', 'd', 'a', 'y'}};
+  cudf::test::lists_column_wrapper<uint8_t> col4{
+    {'M', 'o', 'n', 'd', 'a', 'y'},
+    {'W', 200, 133, 'd', 'n', 200, 133, 's', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'M', 'o', 'n', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'F', 'u', 'n', 'd', 'a', 'y'}};
+
+  auto write_tbl = table_view{{col0, col1, col2, col3, col4}};
+
+  cudf::io::table_input_metadata expected_metadata(write_tbl);
+  expected_metadata.column_metadata[0].set_name("col_single").set_output_as_binary(true);
+  expected_metadata.column_metadata[1].set_name("col_string").set_output_as_binary(true);
+  expected_metadata.column_metadata[2].set_name("col_another").set_output_as_binary(true);
+  expected_metadata.column_metadata[3].set_name("col_binary");
+  expected_metadata.column_metadata[4].set_name("col_binary");
+
+  auto filepath = temp_env->get_temp_filepath("BinaryStrings.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, write_tbl)
+      .metadata(&expected_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema(
+        {cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
+         cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
+         cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
+         cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema()),
+         cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema())});
+  auto result   = cudf::io::read_parquet(in_opts);
+  auto expected = table_view{{col3, col4, col3, col3, col4}};
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetWriterTest, SlicedTable)
+{
+  // This test checks for writing zero copy, offsetted views into existing cudf tables
+
+  std::vector<const char*> strings{
+    "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
+  const auto num_rows = strings.size();
+
+  auto seq_col0 = random_values<int>(num_rows);
+  auto seq_col2 = random_values<float>(num_rows);
+  auto validity =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3 != 0; });
+
+  column_wrapper<int> col0{seq_col0.begin(), seq_col0.end(), validity};
+  column_wrapper<cudf::string_view> col1{strings.begin(), strings.end()};
+  column_wrapper<float> col2{seq_col2.begin(), seq_col2.end(), validity};
+
+  using lcw = cudf::test::lists_column_wrapper<uint64_t>;
+  lcw col3{{9, 8}, {7, 6, 5}, {}, {4}, {3, 2, 1, 0}, {20, 21, 22, 23, 24}, {}, {66, 666}};
+
+  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
+  // [NULL, [[13],[14,15,16]],  NULL]
+  // [NULL, [], NULL, [[]]]
+  // NULL
+  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
+  // [NULL, [[13],[14,15,16]],  NULL]
+  // [[[]]]
+  // [NULL, [], NULL, [[]]]
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+  lcw col4{{
+             {{{{1, 2, 3, 4}, valids}}, {{{5, 6, 7}, valids}, {8, 9}}},
+             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, valids},
+             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, valids},
+             lcw{lcw{lcw{}}},
+             {{{{1, 2, 3, 4}, valids}}, {{{5, 6, 7}, valids}, {8, 9}}},
+             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, valids},
+             lcw{lcw{lcw{}}},
+             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, valids},
+           },
+           valids2};
+
+  // Struct column
+  auto ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
+    {48, 27, 25, 31, 351, 351, 29, 15}, {1, 1, 1, 1, 1, 0, 1, 1}};
+
+  auto col5 = cudf::test::structs_column_wrapper{{ages_col}, {1, 1, 1, 1, 0, 1, 1, 1}};
+
+  // Struct/List mixed column
+
+  // []
+  // [NULL, 2, NULL]
+  // [4, 5]
+  // NULL
+  // []
+  // [7, 8, 9]
+  // [10]
+  // [11, 12]
+  lcw land{{{}, {{1, 2, 3}, valids}, {4, 5}, {}, {}, {7, 8, 9}, {10}, {11, 12}}, valids2};
+
+  // []
+  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
+  // [[7, 8], []]
+  // [[]]
+  // [[]]
+  // [[], [], []]
+  // [[10]]
+  // [[13, 14], [15]]
+  lcw flats{lcw{},
+            {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}},
+            {{7, 8}, {}},
+            lcw{lcw{}},
+            lcw{lcw{}},
+            lcw{lcw{}, lcw{}, lcw{}},
+            {lcw{10}},
+            {{13, 14}, {15}}};
+
+  auto struct_1 = cudf::test::structs_column_wrapper{land, flats};
+  auto is_human = cudf::test::fixed_width_column_wrapper<bool>{
+    {true, true, false, false, true, false, true, false}};
+  auto col6 = cudf::test::structs_column_wrapper{{is_human, struct_1}};
+
+  auto expected = table_view({col0, col1, col2, col3, col4, col5, col6});
+
+  // auto expected_slice = expected;
+  auto expected_slice = cudf::slice(expected, {2, static_cast<cudf::size_type>(num_rows) - 1});
+
+  cudf::io::table_input_metadata expected_metadata(expected_slice);
+  expected_metadata.column_metadata[0].set_name("col_other");
+  expected_metadata.column_metadata[1].set_name("col_string");
+  expected_metadata.column_metadata[2].set_name("col_another");
+  expected_metadata.column_metadata[3].set_name("col_list");
+  expected_metadata.column_metadata[4].set_name("col_multi_level_list");
+  expected_metadata.column_metadata[5].set_name("col_struct");
+  expected_metadata.column_metadata[5].set_name("col_struct_list");
+  expected_metadata.column_metadata[6].child(0).set_name("human?");
+  expected_metadata.column_metadata[6].child(1).set_name("particulars");
+  expected_metadata.column_metadata[6].child(1).child(0).set_name("land");
+  expected_metadata.column_metadata[6].child(1).child(1).set_name("flats");
+
+  auto filepath = temp_env->get_temp_filepath("SlicedTable.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice)
+      .metadata(&expected_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetWriterTest, ListColumn)
+{
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // [NULL, 2, NULL]
+  // []
+  // [4, 5]
+  // NULL
+  lcw col0{{{{1, 2, 3}, valids}, {}, {4, 5}, {}}, valids2};
+
+  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
+  // [[7, 8]]
+  // []
+  // [[]]
+  lcw col1{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}}, lcw{}, lcw{lcw{}}};
+
+  // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]]
+  // [[7, 8]]
+  // []
+  // [[]]
+  lcw col2{{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, valids2}, {{7, 8}}, lcw{}, lcw{lcw{}}};
+
+  // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]]
+  // [[7, 8]]
+  // []
+  // [[]]
+  using dlcw = cudf::test::lists_column_wrapper<double>;
+  dlcw col3{{{{1., 2., 3.}, {}, {4., 5.}, {}, {{0., 6., 0.}, valids}}, valids2},
+            {{7., 8.}},
+            dlcw{},
+            dlcw{dlcw{}}};
+
+  // TODO: uint16_t lists are not read properly in parquet reader
+  // [[1, 2, 3], [], [4, 5], NULL, [0, 6, 0]]
+  // [[7, 8]]
+  // []
+  // NULL
+  // using ui16lcw = cudf::test::lists_column_wrapper<uint16_t>;
+  // cudf::test::lists_column_wrapper<uint16_t> col4{
+  //   {{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, valids2}, {{7, 8}}, ui16lcw{}, ui16lcw{ui16lcw{}}},
+  //   valids2};
+
+  // [[1, 2, 3], [], [4, 5], NULL, [NULL, 6, NULL]]
+  // [[7, 8]]
+  // []
+  // NULL
+  lcw col5{
+    {{{{1, 2, 3}, {}, {4, 5}, {}, {{0, 6, 0}, valids}}, valids2}, {{7, 8}}, lcw{}, lcw{lcw{}}},
+    valids2};
+
+  using strlcw = cudf::test::lists_column_wrapper<cudf::string_view>;
+  cudf::test::lists_column_wrapper<cudf::string_view> col6{
+    {{"Monday", "Monday", "Friday"}, {}, {"Monday", "Friday"}, {}, {"Sunday", "Funday"}},
+    {{"bee", "sting"}},
+    strlcw{},
+    strlcw{strlcw{}}};
+
+  // [[[NULL,2,NULL,4]], [[NULL,6,NULL], [8,9]]]
+  // [NULL, [[13],[14,15,16]],  NULL]
+  // [NULL, [], NULL, [[]]]
+  // NULL
+  lcw col7{{
+             {{{{1, 2, 3, 4}, valids}}, {{{5, 6, 7}, valids}, {8, 9}}},
+             {{{{10, 11}, {12}}, {{13}, {14, 15, 16}}, {{17, 18}}}, valids},
+             {{lcw{lcw{}}, lcw{}, lcw{}, lcw{lcw{}}}, valids},
+             lcw{lcw{lcw{}}},
+           },
+           valids2};
+
+  table_view expected({col0, col1, col2, col3, /* col4, */ col5, col6, col7});
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("col_list_int_0");
+  expected_metadata.column_metadata[1].set_name("col_list_list_int_1");
+  expected_metadata.column_metadata[2].set_name("col_list_list_int_nullable_2");
+  expected_metadata.column_metadata[3].set_name("col_list_list_nullable_double_nullable_3");
+  // expected_metadata.column_metadata[0].set_name("col_list_list_uint16_4");
+  expected_metadata.column_metadata[4].set_name("col_list_nullable_list_nullable_int_nullable_5");
+  expected_metadata.column_metadata[5].set_name("col_list_list_string_6");
+  expected_metadata.column_metadata[6].set_name("col_list_list_list_7");
+
+  auto filepath = temp_env->get_temp_filepath("ListColumn.parquet");
+  auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+                    .metadata(&expected_metadata)
+                    .compression(cudf::io::compression_type::NONE);
+
+  cudf::io::write_parquet(out_opts);
+
+  auto in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result  = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetWriterTest, MultiIndex)
+{
+  constexpr auto num_rows = 100;
+
+  auto col0_data = random_values<int8_t>(num_rows);
+  auto col1_data = random_values<int16_t>(num_rows);
+  auto col2_data = random_values<int32_t>(num_rows);
+  auto col3_data = random_values<float>(num_rows);
+  auto col4_data = random_values<double>(num_rows);
+  auto validity  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  column_wrapper<int8_t> col0{col0_data.begin(), col0_data.end(), validity};
+  column_wrapper<int16_t> col1{col1_data.begin(), col1_data.end(), validity};
+  column_wrapper<int32_t> col2{col2_data.begin(), col2_data.end(), validity};
+  column_wrapper<float> col3{col3_data.begin(), col3_data.end(), validity};
+  column_wrapper<double> col4{col4_data.begin(), col4_data.end(), validity};
+
+  auto expected = table_view{{col0, col1, col2, col3, col4}};
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("int8s");
+  expected_metadata.column_metadata[1].set_name("int16s");
+  expected_metadata.column_metadata[2].set_name("int32s");
+  expected_metadata.column_metadata[3].set_name("floats");
+  expected_metadata.column_metadata[4].set_name("doubles");
+
+  auto filepath = temp_env->get_temp_filepath("MultiIndex.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(&expected_metadata)
+      .key_value_metadata(
+        {{{"pandas", "\"index_columns\": [\"int8s\", \"int16s\"], \"column1\": [\"int32s\"]"}}});
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .use_pandas_metadata(true)
+      .columns({"int32s", "floats", "doubles"});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetWriterTest, HostBuffer)
+{
+  constexpr auto num_rows = 100 << 10;
+  const auto seq_col      = random_values<int>(num_rows);
+  const auto validity =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+  column_wrapper<int> col{seq_col.begin(), seq_col.end(), validity};
+
+  const auto expected = table_view{{col}};
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("col_other");
+
+  std::vector<char> out_buffer;
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), expected)
+      .metadata(&expected_metadata);
+  cudf::io::write_parquet(out_opts);
+  cudf::io::parquet_reader_options in_opts = cudf::io::parquet_reader_options::builder(
+    cudf::io::source_info(out_buffer.data(), out_buffer.size()));
+  const auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetWriterTest, NonNullable)
+{
+  srand(31337);
+  auto expected = create_random_fixed_table<int>(9, 9, false);
+
+  auto filepath = temp_env->get_temp_filepath("NonNullable.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TEST_F(ParquetWriterTest, Struct)
+{
+  // Struct<is_human:bool, Struct<names:string, ages:int>>
+
+  auto names = {"Samuel Vimes",
+                "Carrot Ironfoundersson",
+                "Angua von Uberwald",
+                "Cheery Littlebottom",
+                "Detritus",
+                "Mr Slant"};
+
+  // `Name` column has all valid values.
+  auto names_col = cudf::test::strings_column_wrapper{names.begin(), names.end()};
+
+  auto ages_col =
+    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+
+  auto struct_1 = cudf::test::structs_column_wrapper{{names_col, ages_col}, {1, 1, 1, 1, 0, 1}};
+
+  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
+    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
+
+  auto struct_2 =
+    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
+
+  auto expected = table_view({*struct_2});
+
+  auto filepath = temp_env->get_temp_filepath("Struct.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
+  cudf::io::read_parquet(read_args);
+}
+
+TEST_F(ParquetWriterTest, StructOfList)
+{
+  // Struct<is_human:bool,
+  //        Struct<weight:float,
+  //               ages:int,
+  //               land_unit:List<int>>,
+  //               flats:List<List<int>>
+  //              >
+  //       >
+
+  auto weights_col = cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
+
+  auto ages_col =
+    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // []
+  // [NULL, 2, NULL]
+  // [4, 5]
+  // NULL
+  // []
+  // [7, 8, 9]
+  lcw land_unit{{{}, {{1, 2, 3}, valids}, {4, 5}, {}, {}, {7, 8, 9}}, valids2};
+
+  // []
+  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
+  // [[7, 8], []]
+  // [[]]
+  // [[]]
+  // [[], [], []]
+  lcw flats{lcw{},
+            {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}},
+            {{7, 8}, {}},
+            lcw{lcw{}},
+            lcw{lcw{}},
+            lcw{lcw{}, lcw{}, lcw{}}};
+
+  auto struct_1 = cudf::test::structs_column_wrapper{{weights_col, ages_col, land_unit, flats},
+                                                     {1, 1, 1, 1, 0, 1}};
+
+  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
+    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
+
+  auto struct_2 =
+    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
+
+  auto expected = table_view({*struct_2});
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("being");
+  expected_metadata.column_metadata[0].child(0).set_name("human?");
+  expected_metadata.column_metadata[0].child(1).set_name("particulars");
+  expected_metadata.column_metadata[0].child(1).child(0).set_name("weight");
+  expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
+  expected_metadata.column_metadata[0].child(1).child(2).set_name("land_unit");
+  expected_metadata.column_metadata[0].child(1).child(3).set_name("flats");
+
+  auto filepath = temp_env->get_temp_filepath("StructOfList.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(&expected_metadata);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
+  const auto result = cudf::io::read_parquet(read_args);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetWriterTest, ListOfStruct)
+{
+  // List<Struct<is_human:bool,
+  //             Struct<weight:float,
+  //                    ages:int,
+  //                   >
+  //            >
+  //     >
+
+  auto weight_col = cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
+
+  auto ages_col =
+    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+
+  auto struct_1 = cudf::test::structs_column_wrapper{{weight_col, ages_col}, {1, 1, 1, 1, 0, 1}};
+
+  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
+    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
+
+  auto struct_2 =
+    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
+
+  auto list_offsets_column =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 5, 5, 6}.release();
+  auto num_list_rows = list_offsets_column->size() - 1;
+
+  auto list_col = cudf::make_lists_column(num_list_rows,
+                                          std::move(list_offsets_column),
+                                          std::move(struct_2),
+                                          cudf::UNKNOWN_NULL_COUNT,
+                                          {});
+
+  auto expected = table_view({*list_col});
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("family");
+  expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
+  expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
+  expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight");
+  expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
+
+  auto filepath = temp_env->get_temp_filepath("ListOfStruct.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(&expected_metadata);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
+  const auto result = cudf::io::read_parquet(read_args);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+// custom data sink that supports device writes. uses plain file io.
+class custom_test_data_sink : public cudf::io::data_sink {
+ public:
+  explicit custom_test_data_sink(std::string const& filepath)
+  {
+    outfile_.open(filepath, std::ios::out | std::ios::binary | std::ios::trunc);
+    CUDF_EXPECTS(outfile_.is_open(), "Cannot open output file");
+  }
+
+  virtual ~custom_test_data_sink() { flush(); }
+
+  void host_write(void const* data, size_t size) override
+  {
+    outfile_.write(static_cast<char const*>(data), size);
+  }
+
+  [[nodiscard]] bool supports_device_write() const override { return true; }
+
+  void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
+  {
+    this->device_write_async(gpu_data, size, stream).get();
+  }
+
+  std::future<void> device_write_async(void const* gpu_data,
+                                       size_t size,
+                                       rmm::cuda_stream_view stream) override
+  {
+    return std::async(std::launch::deferred, [=] {
+      char* ptr = nullptr;
+      CUDF_CUDA_TRY(cudaMallocHost(&ptr, size));
+      CUDF_CUDA_TRY(cudaMemcpyAsync(ptr, gpu_data, size, cudaMemcpyDeviceToHost, stream.value()));
+      stream.synchronize();
+      outfile_.write(ptr, size);
+      CUDF_CUDA_TRY(cudaFreeHost(ptr));
+    });
+  }
+
+  void flush() override { outfile_.flush(); }
+
+  size_t bytes_written() override { return outfile_.tellp(); }
+
+ private:
+  std::ofstream outfile_;
+};
+
+TEST_F(ParquetWriterTest, CustomDataSink)
+{
+  auto filepath = temp_env->get_temp_filepath("CustomDataSink.parquet");
+  custom_test_data_sink custom_sink(filepath);
+
+  srand(31337);
+  auto expected = create_random_fixed_table<int>(5, 10, false);
+
+  // write out using the custom sink
+  {
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+    cudf::io::write_parquet(args);
+  }
+
+  // write out using a memmapped sink
+  std::vector<char> buf_sink;
+  {
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&buf_sink}, *expected);
+    cudf::io::write_parquet(args);
+  }
+
+  // read them back in and make sure everything matches
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+
+  cudf::io::parquet_reader_options buf_args = cudf::io::parquet_reader_options::builder(
+    cudf::io::source_info{buf_sink.data(), buf_sink.size()});
+  auto buf_tbl = cudf::io::read_parquet(buf_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(buf_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterTest, DeviceWriteLargeishFile)
+{
+  auto filepath = temp_env->get_temp_filepath("DeviceWriteLargeishFile.parquet");
+  custom_test_data_sink custom_sink(filepath);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_random_fixed_table<int>(4, 4 * 1024 * 1024, false);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterTest, PartitionedWrite)
+{
+  auto source = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 1000, false);
+
+  auto filepath1 = temp_env->get_temp_filepath("PartitionedWrite1.parquet");
+  auto filepath2 = temp_env->get_temp_filepath("PartitionedWrite2.parquet");
+
+  auto partition1 = cudf::io::partition_info{10, 1024 * 1024};
+  auto partition2 = cudf::io::partition_info{20 * 1024 + 7, 3 * 1024 * 1024};
+
+  auto expected1 =
+    cudf::slice(*source, {partition1.start_row, partition1.start_row + partition1.num_rows});
+  auto expected2 =
+    cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
+
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(
+      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
+      .partitions({partition1, partition2})
+      .compression(cudf::io::compression_type::NONE);
+  cudf::io::write_parquet(args);
+
+  auto result1 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
+
+  auto result2 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
+}
+
+TEST_F(ParquetWriterTest, PartitionedWriteEmptyPartitions)
+{
+  auto source = create_random_fixed_table<int>(4, 4, false);
+
+  auto filepath1 = temp_env->get_temp_filepath("PartitionedWrite1.parquet");
+  auto filepath2 = temp_env->get_temp_filepath("PartitionedWrite2.parquet");
+
+  auto partition1 = cudf::io::partition_info{1, 0};
+  auto partition2 = cudf::io::partition_info{1, 0};
+
+  auto expected1 =
+    cudf::slice(*source, {partition1.start_row, partition1.start_row + partition1.num_rows});
+  auto expected2 =
+    cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
+
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(
+      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
+      .partitions({partition1, partition2})
+      .compression(cudf::io::compression_type::NONE);
+  cudf::io::write_parquet(args);
+
+  auto result1 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
+
+  auto result2 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
+}
+
+TEST_F(ParquetWriterTest, PartitionedWriteEmptyColumns)
+{
+  auto source = create_random_fixed_table<int>(0, 4, false);
+
+  auto filepath1 = temp_env->get_temp_filepath("PartitionedWrite1.parquet");
+  auto filepath2 = temp_env->get_temp_filepath("PartitionedWrite2.parquet");
+
+  auto partition1 = cudf::io::partition_info{1, 0};
+  auto partition2 = cudf::io::partition_info{1, 0};
+
+  auto expected1 =
+    cudf::slice(*source, {partition1.start_row, partition1.start_row + partition1.num_rows});
+  auto expected2 =
+    cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
+
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(
+      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
+      .partitions({partition1, partition2})
+      .compression(cudf::io::compression_type::NONE);
+  cudf::io::write_parquet(args);
+
+  auto result1 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
+
+  auto result2 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
+}
+
+template <typename T>
+std::string create_parquet_file(int num_cols)
+{
+  srand(31337);
+  auto const table = create_random_fixed_table<T>(num_cols, 10, true);
+  auto const filepath =
+    temp_env->get_temp_filepath(typeid(T).name() + std::to_string(num_cols) + ".parquet");
+  cudf::io::parquet_writer_options const out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table->view());
+  cudf::io::write_parquet(out_opts);
+  return filepath;
+}
+
+TEST_F(ParquetWriterTest, MultipleMismatchedSources)
+{
+  auto const int5file = create_parquet_file<int>(5);
+  {
+    auto const float5file = create_parquet_file<float>(5);
+    std::vector<std::string> files{int5file, float5file};
+    cudf::io::parquet_reader_options const read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{files});
+    EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
+  }
+  {
+    auto const int10file = create_parquet_file<int>(10);
+    std::vector<std::string> files{int5file, int10file};
+    cudf::io::parquet_reader_options const read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{files});
+    EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
+  }
+}
+
+TEST_F(ParquetWriterTest, Slice)
+{
+  auto col =
+    cudf::test::fixed_width_column_wrapper<int>{{1, 2, 3, 4, 5}, {true, true, true, false, true}};
+  std::vector<cudf::size_type> indices{2, 5};
+  std::vector<cudf::column_view> result = cudf::slice(col, indices);
+  cudf::table_view tbl{result};
+
+  auto filepath = temp_env->get_temp_filepath("Slice.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto read_table = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(read_table.tbl->view(), tbl);
+}
+
+TEST_F(ParquetChunkedWriterTest, SingleTable)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(5, 5, true);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedSingle.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(*table1);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1);
+}
+
+TEST_F(ParquetChunkedWriterTest, SimpleTable)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(5, 5, true);
+  auto table2 = create_random_fixed_table<int>(5, 5, true);
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedSimple.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+}
+
+TEST_F(ParquetChunkedWriterTest, LargeTables)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(512, 4096, true);
+  auto table2 = create_random_fixed_table<int>(512, 8192, true);
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedLarge.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  auto md = cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2).close();
+  CUDF_EXPECTS(!md, "The return value should be null.");
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+}
+
+TEST_F(ParquetChunkedWriterTest, ManyTables)
+{
+  srand(31337);
+  std::vector<std::unique_ptr<table>> tables;
+  std::vector<table_view> table_views;
+  constexpr int num_tables = 96;
+  for (int idx = 0; idx < num_tables; idx++) {
+    auto tbl = create_random_fixed_table<int>(16, 64, true);
+    table_views.push_back(*tbl);
+    tables.push_back(std::move(tbl));
+  }
+
+  auto expected = cudf::concatenate(table_views);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedManyTables.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
+  std::for_each(table_views.begin(), table_views.end(), [&writer](table_view const& tbl) {
+    writer.write(tbl);
+  });
+  auto md = writer.close({"dummy/path"});
+  CUDF_EXPECTS(md, "The returned metadata should not be null.");
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TEST_F(ParquetChunkedWriterTest, Strings)
+{
+  std::vector<std::unique_ptr<cudf::column>> cols;
+
+  bool mask1[] = {true, true, false, true, true, true, true};
+  std::vector<const char*> h_strings1{"four", "score", "and", "seven", "years", "ago", "abcdefgh"};
+  cudf::test::strings_column_wrapper strings1(h_strings1.begin(), h_strings1.end(), mask1);
+  cols.push_back(strings1.release());
+  cudf::table tbl1(std::move(cols));
+
+  bool mask2[] = {false, true, true, true, true, true, true};
+  std::vector<const char*> h_strings2{"ooooo", "ppppppp", "fff", "j", "cccc", "bbb", "zzzzzzzzzzz"};
+  cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), mask2);
+  cols.push_back(strings2.release());
+  cudf::table tbl2(std::move(cols));
+
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedStrings.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TEST_F(ParquetChunkedWriterTest, ListColumn)
+{
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // COL0 (Same nullability) ====================
+  // [NULL, 2, NULL]
+  // []
+  // [4, 5]
+  // NULL
+  lcw col0_tbl0{{{{1, 2, 3}, valids}, {}, {4, 5}, {}}, valids2};
+
+  // [7, 8, 9]
+  // []
+  // [NULL, 11]
+  // NULL
+  lcw col0_tbl1{{{7, 8, 9}, {}, {{10, 11}, valids}, {}}, valids2};
+
+  // COL1 (Nullability different in different chunks, test of merging nullability in writer)
+  // [NULL, 2, NULL]
+  // []
+  // [4, 5]
+  // []
+  lcw col1_tbl0{{{1, 2, 3}, valids}, {}, {4, 5}, {}};
+
+  // [7, 8, 9]
+  // []
+  // [10, 11]
+  // NULL
+  lcw col1_tbl1{{{7, 8, 9}, {}, {10, 11}, {}}, valids2};
+
+  // COL2 (non-nested columns to test proper schema construction)
+  size_t num_rows_tbl0 = static_cast<cudf::column_view>(col0_tbl0).size();
+  size_t num_rows_tbl1 = static_cast<cudf::column_view>(col0_tbl1).size();
+  auto seq_col0        = random_values<int>(num_rows_tbl0);
+  auto seq_col1        = random_values<int>(num_rows_tbl1);
+
+  column_wrapper<int> col2_tbl0{seq_col0.begin(), seq_col0.end(), valids};
+  column_wrapper<int> col2_tbl1{seq_col1.begin(), seq_col1.end(), valids2};
+
+  auto tbl0 = table_view({col0_tbl0, col1_tbl0, col2_tbl0});
+  auto tbl1 = table_view({col0_tbl1, col1_tbl1, col2_tbl1});
+
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl0, tbl1}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(tbl0).write(tbl1);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TEST_F(ParquetChunkedWriterTest, ListOfStruct)
+{
+  // Table 1
+  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
+  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
+  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1};
+  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}};
+  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
+
+  auto list_offsets_column_1 =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 3, 3}.release();
+  auto num_list_rows_1 = list_offsets_column_1->size() - 1;
+
+  auto list_col_1 = cudf::make_lists_column(num_list_rows_1,
+                                            std::move(list_offsets_column_1),
+                                            struct_2_1.release(),
+                                            cudf::UNKNOWN_NULL_COUNT,
+                                            {});
+
+  auto table_1 = table_view({*list_col_1});
+
+  // Table 2
+  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{1.1, -1.0, -1.0}};
+  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{31, 351, 351}, {1, 1, 0}};
+  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}};
+  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false, false}, {1, 1, 0}};
+  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
+
+  auto list_offsets_column_2 =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 1, 2, 3}.release();
+  auto num_list_rows_2 = list_offsets_column_2->size() - 1;
+
+  auto list_col_2 = cudf::make_lists_column(num_list_rows_2,
+                                            std::move(list_offsets_column_2),
+                                            struct_2_2.release(),
+                                            cudf::UNKNOWN_NULL_COUNT,
+                                            {});
+
+  auto table_2 = table_view({*list_col_2});
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
+
+  cudf::io::table_input_metadata expected_metadata(table_1);
+  expected_metadata.column_metadata[0].set_name("family");
+  expected_metadata.column_metadata[0].child(1).set_nullability(false);
+  expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
+  expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
+  expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight");
+  expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedListOfStruct.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  args.set_metadata(&expected_metadata);
+  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList)
+{
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // Table 1 ===========================
+
+  // []
+  // [NULL, 2, NULL]
+  // [4, 5]
+  // NULL
+  lcw land_1{{{}, {{1, 2, 3}, valids}, {4, 5}, {}}, valids2};
+
+  // []
+  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
+  // [[7, 8], []]
+  // [[]]
+  lcw flats_1{lcw{}, {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}, {}}, lcw{lcw{}}};
+
+  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3, 1.1}};
+  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5, 31}};
+  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1, land_1, flats_1};
+  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false, false}};
+  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
+
+  auto list_offsets_column_1 =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 3, 4}.release();
+  auto num_list_rows_1 = list_offsets_column_1->size() - 1;
+
+  auto list_col_1 = cudf::make_lists_column(num_list_rows_1,
+                                            std::move(list_offsets_column_1),
+                                            struct_2_1.release(),
+                                            cudf::UNKNOWN_NULL_COUNT,
+                                            {});
+
+  auto table_1 = table_view({*list_col_1});
+
+  // Table 2 ===========================
+
+  // []
+  // [7, 8, 9]
+  lcw land_2{{}, {7, 8, 9}};
+
+  // [[]]
+  // [[], [], []]
+  lcw flats_2{lcw{lcw{}}, lcw{lcw{}, lcw{}, lcw{}}};
+
+  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{-1.0, -1.0}};
+  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{351, 351}, {1, 0}};
+  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2, land_2, flats_2}, {0, 1}};
+  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false}, {1, 0}};
+  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
+
+  auto list_offsets_column_2 =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 1, 2}.release();
+  auto num_list_rows_2 = list_offsets_column_2->size() - 1;
+
+  auto list_col_2 = cudf::make_lists_column(num_list_rows_2,
+                                            std::move(list_offsets_column_2),
+                                            struct_2_2.release(),
+                                            cudf::UNKNOWN_NULL_COUNT,
+                                            {});
+
+  auto table_2 = table_view({*list_col_2});
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
+
+  cudf::io::table_input_metadata expected_metadata(table_1);
+  expected_metadata.column_metadata[0].set_name("family");
+  expected_metadata.column_metadata[0].child(1).set_nullability(false);
+  expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
+  expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
+  expected_metadata.column_metadata[0].child(1).child(1).child(0).set_name("weight");
+  expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
+  expected_metadata.column_metadata[0].child(1).child(1).child(2).set_name("land_unit");
+  expected_metadata.column_metadata[0].child(1).child(1).child(3).set_name("flats");
+
+  auto filepath = temp_env->get_temp_filepath("ListOfStructOfStructOfListOfList.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  args.set_metadata(&expected_metadata);
+  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+
+  // We specifically mentioned in input schema that struct_2 is non-nullable across chunked calls.
+  auto result_parent_list = result.tbl->get_column(0);
+  auto result_struct_2    = result_parent_list.child(cudf::lists_column_view::child_column_index);
+  EXPECT_EQ(result_struct_2.nullable(), false);
+}
+
+TEST_F(ParquetChunkedWriterTest, MismatchedTypes)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(4, 4, true);
+  auto table2 = create_random_fixed_table<float>(4, 4, true);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedTypes.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
+  writer.write(*table1);
+  EXPECT_THROW(writer.write(*table2), cudf::logic_error);
+  writer.close();
+}
+
+TEST_F(ParquetChunkedWriterTest, ChunkedWriteAfterClosing)
+{
+  srand(31337);
+  auto table = create_random_fixed_table<int>(4, 4, true);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedWriteAfterClosing.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
+  writer.write(*table).close();
+  EXPECT_THROW(writer.write(*table), cudf::logic_error);
+}
+
+TEST_F(ParquetChunkedWriterTest, ReadingUnclosedFile)
+{
+  srand(31337);
+  auto table = create_random_fixed_table<int>(4, 4, true);
+
+  auto filepath = temp_env->get_temp_filepath("ReadingUnclosedFile.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
+  writer.write(*table);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
+}
+
+TEST_F(ParquetChunkedWriterTest, MismatchedStructure)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(4, 4, true);
+  auto table2 = create_random_fixed_table<float>(3, 4, true);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedStructure.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
+  writer.write(*table1);
+  EXPECT_THROW(writer.write(*table2), cudf::logic_error);
+  writer.close();
+}
+
+TEST_F(ParquetChunkedWriterTest, MismatchedStructureList)
+{
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // COL0 (mismatched depth) ====================
+  // [NULL, 2, NULL]
+  // []
+  // [4, 5]
+  // NULL
+  lcw col00{{{{1, 2, 3}, valids}, {}, {4, 5}, {}}, valids2};
+
+  // [[1, 2, 3], [], [4, 5], [], [0, 6, 0]]
+  // [[7, 8]]
+  // []
+  // [[]]
+  lcw col01{{{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}}, {{7, 8}}, lcw{}, lcw{lcw{}}};
+
+  // COL2 (non-nested columns to test proper schema construction)
+  size_t num_rows = static_cast<cudf::column_view>(col00).size();
+  auto seq_col0   = random_values<int>(num_rows);
+  auto seq_col1   = random_values<int>(num_rows);
+
+  column_wrapper<int> col10{seq_col0.begin(), seq_col0.end(), valids};
+  column_wrapper<int> col11{seq_col1.begin(), seq_col1.end(), valids2};
+
+  auto tbl0 = table_view({col00, col10});
+  auto tbl1 = table_view({col01, col11});
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
+  writer.write(tbl0);
+  EXPECT_THROW(writer.write(tbl1), cudf::logic_error);
+}
+
+TEST_F(ParquetChunkedWriterTest, DifferentNullability)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(5, 5, true);
+  auto table2 = create_random_fixed_table<int>(5, 5, false);
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedNullable.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+}
+
+TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct)
+{
+  // Struct<is_human:bool (non-nullable),
+  //        Struct<weight:float>,
+  //               age:int
+  //              > (nullable)
+  //       > (non-nullable)
+
+  // Table 1: is_human and struct_1 are non-nullable but should be nullable when read back.
+  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
+  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
+  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1};
+  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}};
+  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
+  auto table_1    = cudf::table_view({struct_2_1});
+
+  // Table 2: struct_1 and is_human are nullable now so if we hadn't assumed worst case (nullable)
+  // when writing table_1, we would have wrong pages for it.
+  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{1.1, -1.0, -1.0}};
+  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{31, 351, 351}, {1, 1, 0}};
+  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}};
+  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false, false}, {1, 1, 0}};
+  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
+  auto table_2    = cudf::table_view({struct_2_2});
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
+
+  cudf::io::table_input_metadata expected_metadata(table_1);
+  expected_metadata.column_metadata[0].set_name("being");
+  expected_metadata.column_metadata[0].child(0).set_name("human?");
+  expected_metadata.column_metadata[0].child(1).set_name("particulars");
+  expected_metadata.column_metadata[0].child(1).child(0).set_name("weight");
+  expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  args.set_metadata(&expected_metadata);
+  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetChunkedWriterTest, ForcedNullability)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(5, 5, false);
+  auto table2 = create_random_fixed_table<int>(5, 5, false);
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedNoNullable.parquet");
+
+  cudf::io::table_input_metadata metadata(*table1);
+
+  // In the absence of prescribed per-column nullability in metadata, the writer assumes the worst
+  // and considers all columns nullable. However cudf::concatenate will not force nulls in case no
+  // columns are nullable. To get the expected result, we tell the writer the nullability of all
+  // columns in advance.
+  for (auto& col_meta : metadata.column_metadata) {
+    col_meta.set_nullability(false);
+  }
+
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath})
+      .metadata(&metadata);
+  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+}
+
+TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList)
+{
+  srand(31337);
+
+  auto valids  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto valids2 = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+
+  using lcw = cudf::test::lists_column_wrapper<int32_t>;
+
+  // COL0 ====================
+  // [1, 2, 3]
+  // []
+  // [4, 5]
+  // NULL
+  lcw col00{{{1, 2, 3}, {}, {4, 5}, {}}, valids2};
+
+  // [7]
+  // []
+  // [8, 9, 10, 11]
+  // NULL
+  lcw col01{{{7}, {}, {8, 9, 10, 11}, {}}, valids2};
+
+  // COL1 (non-nested columns to test proper schema construction)
+  size_t num_rows = static_cast<cudf::column_view>(col00).size();
+  auto seq_col0   = random_values<int>(num_rows);
+  auto seq_col1   = random_values<int>(num_rows);
+
+  column_wrapper<int> col10{seq_col0.begin(), seq_col0.end(), valids};
+  column_wrapper<int> col11{seq_col1.begin(), seq_col1.end(), valids2};
+
+  auto table1 = table_view({col00, col10});
+  auto table2 = table_view({col01, col11});
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({table1, table2}));
+
+  cudf::io::table_input_metadata metadata(table1);
+  metadata.column_metadata[0].set_nullability(true);  // List is nullable at first (root) level
+  metadata.column_metadata[0].child(1).set_nullability(
+    false);  // non-nullable at second (leaf) level
+  metadata.column_metadata[1].set_nullability(true);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedListNullable.parquet");
+
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath})
+      .metadata(&metadata);
+  cudf::io::parquet_chunked_writer(args).write(table1).write(table2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+}
+
+TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct)
+{
+  // Struct<is_human:bool (non-nullable),
+  //        Struct<weight:float>,
+  //               age:int
+  //              > (nullable)
+  //       > (non-nullable)
+
+  // Table 1: is_human and struct_2 are non-nullable and should stay that way when read back.
+  auto weight_1   = cudf::test::fixed_width_column_wrapper<float>{{57.5, 51.1, 15.3}};
+  auto ages_1     = cudf::test::fixed_width_column_wrapper<int32_t>{{30, 27, 5}};
+  auto struct_1_1 = cudf::test::structs_column_wrapper{weight_1, ages_1};
+  auto is_human_1 = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}};
+  auto struct_2_1 = cudf::test::structs_column_wrapper{{is_human_1, struct_1_1}};
+  auto table_1    = cudf::table_view({struct_2_1});
+
+  auto weight_2   = cudf::test::fixed_width_column_wrapper<float>{{1.1, -1.0, -1.0}};
+  auto ages_2     = cudf::test::fixed_width_column_wrapper<int32_t>{{31, 351, 351}, {1, 1, 0}};
+  auto struct_1_2 = cudf::test::structs_column_wrapper{{weight_2, ages_2}, {1, 0, 1}};
+  auto is_human_2 = cudf::test::fixed_width_column_wrapper<bool>{{false, false, false}};
+  auto struct_2_2 = cudf::test::structs_column_wrapper{{is_human_2, struct_1_2}};
+  auto table_2    = cudf::table_view({struct_2_2});
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
+
+  cudf::io::table_input_metadata expected_metadata(table_1);
+  expected_metadata.column_metadata[0].set_name("being").set_nullability(false);
+  expected_metadata.column_metadata[0].child(0).set_name("human?").set_nullability(false);
+  expected_metadata.column_metadata[0].child(1).set_name("particulars");
+  expected_metadata.column_metadata[0].child(1).child(0).set_name("weight");
+  expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  args.set_metadata(&expected_metadata);
+  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+  cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+}
+
+TEST_F(ParquetChunkedWriterTest, ReadRowGroups)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(5, 5, true);
+  auto table2 = create_random_fixed_table<int>(5, 5, true);
+
+  auto full_table = cudf::concatenate(std::vector<table_view>({*table2, *table1, *table2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedRowGroups.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  {
+    cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
+  }
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .row_groups({{1, 0, 1}});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
+}
+
+TEST_F(ParquetChunkedWriterTest, ReadRowGroupsError)
+{
+  srand(31337);
+  auto table1 = create_random_fixed_table<int>(5, 5, true);
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedRowGroupsError.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(*table1);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).row_groups({{0, 1}});
+  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
+  read_opts.set_row_groups({{-1}});
+  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
+  read_opts.set_row_groups({{0}, {0}});
+  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
+}
+
+TEST_F(ParquetWriterTest, DecimalWrite)
+{
+  constexpr cudf::size_type num_rows = 500;
+  auto seq_col0                      = random_values<int32_t>(num_rows);
+  auto seq_col1                      = random_values<int64_t>(num_rows);
+
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
+
+  auto col0 = cudf::test::fixed_point_column_wrapper<int32_t>{
+    seq_col0.begin(), seq_col0.end(), valids, numeric::scale_type{5}};
+  auto col1 = cudf::test::fixed_point_column_wrapper<int64_t>{
+    seq_col1.begin(), seq_col1.end(), valids, numeric::scale_type{-9}};
+
+  auto table = table_view({col0, col1});
+
+  auto filepath = temp_env->get_temp_filepath("DecimalWrite.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table);
+
+  cudf::io::table_input_metadata expected_metadata(table);
+
+  // verify failure if too small a precision is given
+  expected_metadata.column_metadata[0].set_decimal_precision(7);
+  expected_metadata.column_metadata[1].set_decimal_precision(1);
+  args.set_metadata(&expected_metadata);
+  EXPECT_THROW(cudf::io::write_parquet(args), cudf::logic_error);
+
+  // verify success if equal precision is given
+  expected_metadata.column_metadata[0].set_decimal_precision(7);
+  expected_metadata.column_metadata[1].set_decimal_precision(9);
+  args.set_metadata(&expected_metadata);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, table);
+}
+
+TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize)
+{
+  // write out two 31 row tables and make sure they get
+  // read back with all their validity bits in the right place
+
+  using T = TypeParam;
+
+  int num_els = 31;
+  std::vector<std::unique_ptr<cudf::column>> cols;
+
+  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true, true, true,
+
+                 true,  true, true, true, true, true, true, true, true};
+  T c1a[num_els];
+  std::fill(c1a, c1a + num_els, static_cast<T>(5));
+  T c1b[num_els];
+  std::fill(c1b, c1b + num_els, static_cast<T>(6));
+  column_wrapper<T> c1a_w(c1a, c1a + num_els, mask);
+  column_wrapper<T> c1b_w(c1b, c1b + num_els, mask);
+  cols.push_back(c1a_w.release());
+  cols.push_back(c1b_w.release());
+  cudf::table tbl1(std::move(cols));
+
+  T c2a[num_els];
+  std::fill(c2a, c2a + num_els, static_cast<T>(8));
+  T c2b[num_els];
+  std::fill(c2b, c2b + num_els, static_cast<T>(9));
+  column_wrapper<T> c2a_w(c2a, c2a + num_els, mask);
+  column_wrapper<T> c2b_w(c2b, c2b + num_els, mask);
+  cols.push_back(c2a_w.release());
+  cols.push_back(c2b_w.release());
+  cudf::table tbl2(std::move(cols));
+
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize2)
+{
+  // write out two 33 row tables and make sure they get
+  // read back with all their validity bits in the right place
+
+  using T = TypeParam;
+
+  int num_els = 33;
+  std::vector<std::unique_ptr<cudf::column>> cols;
+
+  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true, true, true};
+
+  T c1a[num_els];
+  std::fill(c1a, c1a + num_els, static_cast<T>(5));
+  T c1b[num_els];
+  std::fill(c1b, c1b + num_els, static_cast<T>(6));
+  column_wrapper<T> c1a_w(c1a, c1a + num_els, mask);
+  column_wrapper<T> c1b_w(c1b, c1b + num_els, mask);
+  cols.push_back(c1a_w.release());
+  cols.push_back(c1b_w.release());
+  cudf::table tbl1(std::move(cols));
+
+  T c2a[num_els];
+  std::fill(c2a, c2a + num_els, static_cast<T>(8));
+  T c2b[num_els];
+  std::fill(c2b, c2b + num_els, static_cast<T>(9));
+  column_wrapper<T> c2a_w(c2a, c2a + num_els, mask);
+  column_wrapper<T> c2b_w(c2b, c2b + num_els, mask);
+  cols.push_back(c2a_w.release());
+  cols.push_back(c2b_w.release());
+  cudf::table tbl2(std::move(cols));
+
+  auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
+
+  auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.parquet");
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
+// custom mem mapped data sink that supports device writes
+template <bool supports_device_writes>
+class custom_test_memmap_sink : public cudf::io::data_sink {
+ public:
+  explicit custom_test_memmap_sink(std::vector<char>* mm_writer_buf)
+  {
+    mm_writer = cudf::io::data_sink::create(mm_writer_buf);
+  }
+
+  virtual ~custom_test_memmap_sink() { mm_writer->flush(); }
+
+  void host_write(void const* data, size_t size) override { mm_writer->host_write(data, size); }
+
+  [[nodiscard]] bool supports_device_write() const override { return supports_device_writes; }
+
+  void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
+  {
+    this->device_write_async(gpu_data, size, stream).get();
+  }
+
+  std::future<void> device_write_async(void const* gpu_data,
+                                       size_t size,
+                                       rmm::cuda_stream_view stream) override
+  {
+    return std::async(std::launch::deferred, [=] {
+      char* ptr = nullptr;
+      CUDF_CUDA_TRY(cudaMallocHost(&ptr, size));
+      CUDF_CUDA_TRY(cudaMemcpyAsync(ptr, gpu_data, size, cudaMemcpyDeviceToHost, stream.value()));
+      stream.synchronize();
+      mm_writer->host_write(ptr, size);
+      CUDF_CUDA_TRY(cudaFreeHost(ptr));
+    });
+  }
+
+  void flush() override { mm_writer->flush(); }
+
+  size_t bytes_written() override { return mm_writer->bytes_written(); }
+
+ private:
+  std::unique_ptr<data_sink> mm_writer;
+};
+
+TEST_F(ParquetWriterStressTest, LargeTableWeakCompression)
+{
+  std::vector<char> mm_buf;
+  mm_buf.reserve(4 * 1024 * 1024 * 16);
+  custom_test_memmap_sink<false> custom_sink(&mm_buf);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_random_fixed_table<int>(16, 4 * 1024 * 1024, false);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterStressTest, LargeTableGoodCompression)
+{
+  std::vector<char> mm_buf;
+  mm_buf.reserve(4 * 1024 * 1024 * 16);
+  custom_test_memmap_sink<false> custom_sink(&mm_buf);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 128 * 1024, false);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterStressTest, LargeTableWithValids)
+{
+  std::vector<char> mm_buf;
+  mm_buf.reserve(4 * 1024 * 1024 * 16);
+  custom_test_memmap_sink<false> custom_sink(&mm_buf);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 6, true);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWeakCompression)
+{
+  std::vector<char> mm_buf;
+  mm_buf.reserve(4 * 1024 * 1024 * 16);
+  custom_test_memmap_sink<true> custom_sink(&mm_buf);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_random_fixed_table<int>(16, 4 * 1024 * 1024, false);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableGoodCompression)
+{
+  std::vector<char> mm_buf;
+  mm_buf.reserve(4 * 1024 * 1024 * 16);
+  custom_test_memmap_sink<true> custom_sink(&mm_buf);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 128 * 1024, false);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWithValids)
+{
+  std::vector<char> mm_buf;
+  mm_buf.reserve(4 * 1024 * 1024 * 16);
+  custom_test_memmap_sink<true> custom_sink(&mm_buf);
+
+  // exercises multiple rowgroups
+  srand(31337);
+  auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 6, true);
+
+  // write out using the custom sink (which uses device writes)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
+}
+
+TEST_F(ParquetReaderTest, UserBounds)
+{
+  // trying to read more rows than there are should result in
+  // receiving the properly capped # of rows
+  {
+    srand(31337);
+    auto expected = create_random_fixed_table<int>(4, 4, false);
+
+    auto filepath = temp_env->get_temp_filepath("TooManyRows.parquet");
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+    cudf::io::write_parquet(args);
+
+    // attempt to read more rows than there actually are
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(16);
+    auto result = cudf::io::read_parquet(read_opts);
+
+    // we should only get back 4 rows
+    EXPECT_EQ(result.tbl->view().column(0).size(), 4);
+  }
+
+  // trying to read past the end of the # of actual rows should result
+  // in empty columns.
+  {
+    srand(31337);
+    auto expected = create_random_fixed_table<int>(4, 4, false);
+
+    auto filepath = temp_env->get_temp_filepath("PastBounds.parquet");
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+    cudf::io::write_parquet(args);
+
+    // attempt to read more rows than there actually are
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).skip_rows(4);
+    auto result = cudf::io::read_parquet(read_opts);
+
+    // we should get empty columns back
+    EXPECT_EQ(result.tbl->view().num_columns(), 4);
+    EXPECT_EQ(result.tbl->view().column(0).size(), 0);
+  }
+
+  // trying to read 0 rows should result in reading the whole file
+  // at the moment we get back 4.  when that bug gets fixed, this
+  // test can be flipped.
+  {
+    srand(31337);
+    auto expected = create_random_fixed_table<int>(4, 4, false);
+
+    auto filepath = temp_env->get_temp_filepath("ZeroRows.parquet");
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+    cudf::io::write_parquet(args);
+
+    // attempt to read more rows than there actually are
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(0);
+    auto result = cudf::io::read_parquet(read_opts);
+
+    EXPECT_EQ(result.tbl->view().num_columns(), 4);
+    EXPECT_EQ(result.tbl->view().column(0).size(), 0);
+  }
+
+  // trying to read 0 rows past the end of the # of actual rows should result
+  // in empty columns.
+  {
+    srand(31337);
+    auto expected = create_random_fixed_table<int>(4, 4, false);
+
+    auto filepath = temp_env->get_temp_filepath("ZeroRowsPastBounds.parquet");
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+    cudf::io::write_parquet(args);
+
+    // attempt to read more rows than there actually are
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .skip_rows(4)
+        .num_rows(0);
+    auto result = cudf::io::read_parquet(read_opts);
+
+    // we should get empty columns back
+    EXPECT_EQ(result.tbl->view().num_columns(), 4);
+    EXPECT_EQ(result.tbl->view().column(0).size(), 0);
+  }
+}
+
+TEST_F(ParquetReaderTest, UserBoundsWithNulls)
+{
+  // clang-format off
+  cudf::test::fixed_width_column_wrapper<float> col{{1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,  5,5,5,5,5,5,5,5, 6,6,6,6,6,6,6,6, 7,7,7,7,7,7,7,7, 8,8,8,8,8,8,8,8}
+                                                   ,{1,1,1,0,0,0,1,1, 1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0, 1,1,1,1,1,1,0,0,  1,0,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,0}};
+  // clang-format on
+  cudf::table_view tbl({col});
+  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNulls.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
+
+  // skip_rows / num_rows
+  // clang-format off
+  std::vector<std::pair<int, int>> params{ {-1, -1}, {1, 3}, {3, -1},
+                                           {31, -1}, {32, -1}, {33, -1},
+                                           {31, 5}, {32, 5}, {33, 5},
+                                           {-1, 7}, {-1, 31}, {-1, 32}, {-1, 33},
+                                           {62, -1}, {63, -1},
+                                           {62, 2}, {63, 1}};
+  // clang-format on
+  for (auto p : params) {
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
+    if (p.second >= 0) { read_args.set_num_rows(p.second); }
+    auto result = cudf::io::read_parquet(read_args);
+
+    p.first  = p.first < 0 ? 0 : p.first;
+    p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
+    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
+    auto expected = cudf::slice(col, slice_indices);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), expected[0]);
+  }
+}
+
+TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
+{
+  constexpr int num_rows = 32 * 1024;
+
+  std::mt19937 gen(6542);
+  std::bernoulli_distribution bn(0.7f);
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
+  auto values = thrust::make_counting_iterator(0);
+
+  // int64
+  cudf::test::fixed_width_column_wrapper<int64_t> c0(values, values + num_rows, valids);
+
+  // list<float>
+  constexpr int floats_per_row = 4;
+  auto c1_offset_iter          = cudf::detail::make_counting_transform_iterator(
+    0, [floats_per_row](cudf::size_type idx) { return idx * floats_per_row; });
+  cudf::test::fixed_width_column_wrapper<cudf::offset_type> c1_offsets(
+    c1_offset_iter, c1_offset_iter + num_rows + 1);
+  cudf::test::fixed_width_column_wrapper<float> c1_floats(
+    values, values + (num_rows * floats_per_row), valids);
+  auto _c1 = cudf::make_lists_column(num_rows,
+                                     c1_offsets.release(),
+                                     c1_floats.release(),
+                                     cudf::UNKNOWN_NULL_COUNT,
+                                     cudf::test::detail::make_null_mask(valids, valids + num_rows));
+  auto c1  = cudf::purge_nonempty_nulls(static_cast<cudf::lists_column_view>(*_c1));
+
+  // list<list<int>>
+  auto c2 = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
+
+  // struct<list<string>, int, float>
+  std::vector<std::string> strings{
+    "abc", "x", "bananas", "gpu", "minty", "backspace", "", "cayenne", "turbine", "soft"};
+  std::uniform_int_distribution<int> uni(0, strings.size() - 1);
+  auto string_iter = cudf::detail::make_counting_transform_iterator(
+    0, [&](cudf::size_type idx) { return strings[uni(gen)]; });
+  constexpr int string_per_row  = 3;
+  constexpr int num_string_rows = num_rows * string_per_row;
+  cudf::test::strings_column_wrapper string_col{string_iter, string_iter + num_string_rows};
+  auto offset_iter = cudf::detail::make_counting_transform_iterator(
+    0, [string_per_row](cudf::size_type idx) { return idx * string_per_row; });
+  cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets(offset_iter,
+                                                                    offset_iter + num_rows + 1);
+
+  auto _c3_valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 200; });
+  std::vector<bool> c3_valids(num_rows);
+  std::copy(_c3_valids, _c3_valids + num_rows, c3_valids.begin());
+  auto _c3_list =
+    cudf::make_lists_column(num_rows,
+                            offsets.release(),
+                            string_col.release(),
+                            cudf::UNKNOWN_NULL_COUNT,
+                            cudf::test::detail::make_null_mask(valids, valids + num_rows));
+  auto c3_list = cudf::purge_nonempty_nulls(static_cast<cudf::lists_column_view>(*_c3_list));
+  cudf::test::fixed_width_column_wrapper<int> c3_ints(values, values + num_rows, valids);
+  cudf::test::fixed_width_column_wrapper<float> c3_floats(values, values + num_rows, valids);
+  std::vector<std::unique_ptr<cudf::column>> c3_children;
+  c3_children.push_back(std::move(c3_list));
+  c3_children.push_back(c3_ints.release());
+  c3_children.push_back(c3_floats.release());
+  cudf::test::structs_column_wrapper _c3(std::move(c3_children), c3_valids);
+  auto c3 = cudf::purge_nonempty_nulls(static_cast<cudf::structs_column_view>(_c3));
+
+  // write it out
+  cudf::table_view tbl({c0, *c1, *c2, *c3});
+  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsMixedTypes.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
+
+  // read it back
+  std::vector<std::pair<int, int>> params{
+    {-1, -1}, {0, num_rows}, {1, num_rows - 1}, {num_rows - 1, 1}, {517, 22000}};
+  for (auto p : params) {
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
+    if (p.second >= 0) { read_args.set_num_rows(p.second); }
+    auto result = cudf::io::read_parquet(read_args);
+
+    p.first  = p.first < 0 ? 0 : p.first;
+    p.second = p.second < 0 ? num_rows - p.first : p.second;
+    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
+    auto expected = cudf::slice(tbl, slice_indices);
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected[0]);
+  }
+}
+
+TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge)
+{
+  constexpr int num_rows = 30 * 1000000;
+
+  std::mt19937 gen(6747);
+  std::bernoulli_distribution bn(0.7f);
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
+  auto values = thrust::make_counting_iterator(0);
+
+  cudf::test::fixed_width_column_wrapper<int> col(values, values + num_rows, valids);
+
+  // this file will have row groups of 1,000,000 each
+  cudf::table_view tbl({col});
+  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsLarge.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
+
+  // skip_rows / num_rows
+  // clang-format off
+  std::vector<std::pair<int, int>> params{ {-1, -1}, {31, -1}, {32, -1}, {33, -1}, {1613470, -1}, {1999999, -1},
+                                           {31, 1}, {32, 1}, {33, 1},
+                                           // deliberately span some row group boundaries
+                                           {999000, 1001}, {999000, 2000}, {2999999, 2}, {13999997, -1},
+                                           {16785678, 3}, {22996176, 31},
+                                           {24001231, 17}, {29000001, 989999}, {29999999, 1} };
+  // clang-format on
+  for (auto p : params) {
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
+    if (p.second >= 0) { read_args.set_num_rows(p.second); }
+    auto result = cudf::io::read_parquet(read_args);
+
+    p.first  = p.first < 0 ? 0 : p.first;
+    p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
+    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
+    auto expected = cudf::slice(col, slice_indices);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), expected[0]);
+  }
+}
+
+TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge)
+{
+  constexpr int num_rows = 5 * 1000000;
+  auto colp              = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
+  cudf::column_view col  = *colp;
+
+  // this file will have row groups of 1,000,000 each
+  cudf::table_view tbl({col});
+  auto filepath = temp_env->get_temp_filepath("ListUserBoundsWithNullsLarge.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
+
+  // skip_rows / num_rows
+  // clang-format off
+  std::vector<std::pair<int, int>> params{ {-1, -1}, {31, -1}, {32, -1}, {33, -1}, {161470, -1}, {4499997, -1},
+                                           {31, 1}, {32, 1}, {33, 1},
+                                           // deliberately span some row group boundaries
+                                           {999000, 1001}, {999000, 2000}, {2999999, 2},
+                                           {1678567, 3}, {4299676, 31},
+                                           {4001231, 17}, {1900000, 989999}, {4999999, 1} };
+  // clang-format on
+  for (auto p : params) {
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
+    if (p.second >= 0) { read_args.set_num_rows(p.second); }
+    auto result = cudf::io::read_parquet(read_args);
+
+    p.first  = p.first < 0 ? 0 : p.first;
+    p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
+    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
+    auto expected = cudf::slice(col, slice_indices);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), expected[0]);
+  }
+}
+
+TEST_F(ParquetReaderTest, ReorderedColumns)
+{
+  {
+    auto a = cudf::test::strings_column_wrapper{{"a", "", "c"}, {true, false, true}};
+    auto b = cudf::test::fixed_width_column_wrapper<int>{1, 2, 3};
+
+    cudf::table_view tbl{{a, b}};
+    auto filepath = temp_env->get_temp_filepath("ReorderedColumns.parquet");
+    cudf::io::table_input_metadata md(tbl);
+    md.column_metadata[0].set_name("a");
+    md.column_metadata[1].set_name("b");
+    cudf::io::parquet_writer_options opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md);
+    cudf::io::write_parquet(opts);
+
+    // read them out of order
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .columns({"b", "a"});
+    auto result = cudf::io::read_parquet(read_opts);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
+  }
+
+  {
+    auto a = cudf::test::fixed_width_column_wrapper<int>{1, 2, 3};
+    auto b = cudf::test::strings_column_wrapper{{"a", "", "c"}, {true, false, true}};
+
+    cudf::table_view tbl{{a, b}};
+    auto filepath = temp_env->get_temp_filepath("ReorderedColumns2.parquet");
+    cudf::io::table_input_metadata md(tbl);
+    md.column_metadata[0].set_name("a");
+    md.column_metadata[1].set_name("b");
+    cudf::io::parquet_writer_options opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md);
+    cudf::io::write_parquet(opts);
+
+    // read them out of order
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .columns({"b", "a"});
+    auto result = cudf::io::read_parquet(read_opts);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
+  }
+
+  auto a = cudf::test::fixed_width_column_wrapper<int>{1, 2, 3, 10, 20, 30};
+  auto b = cudf::test::strings_column_wrapper{{"a", "", "c", "cats", "dogs", "owls"},
+                                              {true, false, true, true, false, true}};
+  auto c = cudf::test::fixed_width_column_wrapper<int>{{15, 16, 17, 25, 26, 32},
+                                                       {false, true, true, true, true, false}};
+  auto d = cudf::test::strings_column_wrapper{"ducks", "sheep", "cows", "fish", "birds", "ants"};
+
+  cudf::table_view tbl{{a, b, c, d}};
+  auto filepath = temp_env->get_temp_filepath("ReorderedColumns3.parquet");
+  cudf::io::table_input_metadata md(tbl);
+  md.column_metadata[0].set_name("a");
+  md.column_metadata[1].set_name("b");
+  md.column_metadata[2].set_name("c");
+  md.column_metadata[3].set_name("d");
+  cudf::io::parquet_writer_options opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md);
+  cudf::io::write_parquet(opts);
+
+  {
+    // read them out of order
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .columns({"d", "a", "b", "c"});
+    auto result = cudf::io::read_parquet(read_opts);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), b);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(3), c);
+  }
+
+  {
+    // read them out of order
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .columns({"c", "d", "a", "b"});
+    auto result = cudf::io::read_parquet(read_opts);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), c);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), d);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), a);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(3), b);
+  }
+
+  {
+    // read them out of order
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .columns({"d", "c", "b", "a"});
+    auto result = cudf::io::read_parquet(read_opts);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), c);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), b);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(3), a);
+  }
+}
+
+TEST_F(ParquetReaderTest, SelectNestedColumn)
+{
+  // Struct<is_human:bool,
+  //        Struct<weight:float,
+  //               ages:int,
+  //               land_unit:List<int>>,
+  //               flats:List<List<int>>
+  //              >
+  //       >
+
+  auto weights_col = cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
+
+  auto ages_col =
+    cudf::test::fixed_width_column_wrapper<int32_t>{{48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+
+  auto struct_1 = cudf::test::structs_column_wrapper{{weights_col, ages_col}, {1, 1, 1, 1, 0, 1}};
+
+  auto is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
+    {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
+
+  auto struct_2 =
+    cudf::test::structs_column_wrapper{{is_human_col, struct_1}, {0, 1, 1, 1, 1, 1}}.release();
+
+  auto input = table_view({*struct_2});
+
+  cudf::io::table_input_metadata input_metadata(input);
+  input_metadata.column_metadata[0].set_name("being");
+  input_metadata.column_metadata[0].child(0).set_name("human?");
+  input_metadata.column_metadata[0].child(1).set_name("particulars");
+  input_metadata.column_metadata[0].child(1).child(0).set_name("weight");
+  input_metadata.column_metadata[0].child(1).child(1).set_name("age");
+
+  auto filepath = temp_env->get_temp_filepath("SelectNestedColumn.parquet");
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input)
+      .metadata(&input_metadata);
+  cudf::io::write_parquet(args);
+
+  {  // Test selecting a single leaf from the table
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
+        .columns({"being.particulars.age"});
+    const auto result = cudf::io::read_parquet(read_args);
+
+    auto expect_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
+      {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+    auto expect_s_1 = cudf::test::structs_column_wrapper{{expect_ages_col}, {1, 1, 1, 1, 0, 1}};
+    auto expect_s_2 =
+      cudf::test::structs_column_wrapper{{expect_s_1}, {0, 1, 1, 1, 1, 1}}.release();
+    auto expected = table_view({*expect_s_2});
+
+    cudf::io::table_input_metadata expected_metadata(expected);
+    expected_metadata.column_metadata[0].set_name("being");
+    expected_metadata.column_metadata[0].child(0).set_name("particulars");
+    expected_metadata.column_metadata[0].child(0).child(0).set_name("age");
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+  }
+
+  {  // Test selecting a non-leaf and expecting all hierarchy from that node onwards
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
+        .columns({"being.particulars"});
+    const auto result = cudf::io::read_parquet(read_args);
+
+    auto expected_weights_col =
+      cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
+
+    auto expected_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
+      {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+
+    auto expected_s_1 = cudf::test::structs_column_wrapper{
+      {expected_weights_col, expected_ages_col}, {1, 1, 1, 1, 0, 1}};
+
+    auto expect_s_2 =
+      cudf::test::structs_column_wrapper{{expected_s_1}, {0, 1, 1, 1, 1, 1}}.release();
+    auto expected = table_view({*expect_s_2});
+
+    cudf::io::table_input_metadata expected_metadata(expected);
+    expected_metadata.column_metadata[0].set_name("being");
+    expected_metadata.column_metadata[0].child(0).set_name("particulars");
+    expected_metadata.column_metadata[0].child(0).child(0).set_name("weight");
+    expected_metadata.column_metadata[0].child(0).child(1).set_name("age");
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+  }
+
+  {  // Test selecting struct children out of order
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
+        .columns({"being.particulars.age", "being.particulars.weight", "being.human?"});
+    const auto result = cudf::io::read_parquet(read_args);
+
+    auto expected_weights_col =
+      cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
+
+    auto expected_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
+      {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
+
+    auto expected_is_human_col = cudf::test::fixed_width_column_wrapper<bool>{
+      {true, true, false, false, false, false}, {1, 1, 0, 1, 1, 0}};
+
+    auto expect_s_1 = cudf::test::structs_column_wrapper{{expected_ages_col, expected_weights_col},
+                                                         {1, 1, 1, 1, 0, 1}};
+
+    auto expect_s_2 =
+      cudf::test::structs_column_wrapper{{expect_s_1, expected_is_human_col}, {0, 1, 1, 1, 1, 1}}
+        .release();
+
+    auto expected = table_view({*expect_s_2});
+
+    cudf::io::table_input_metadata expected_metadata(expected);
+    expected_metadata.column_metadata[0].set_name("being");
+    expected_metadata.column_metadata[0].child(0).set_name("particulars");
+    expected_metadata.column_metadata[0].child(0).child(0).set_name("age");
+    expected_metadata.column_metadata[0].child(0).child(1).set_name("weight");
+    expected_metadata.column_metadata[0].child(1).set_name("human?");
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+    cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
+  }
+}
+
+TEST_F(ParquetReaderTest, DecimalRead)
+{
+  {
+    /* We could add a dataset to include this file, but we don't want tests in cudf to have data.
+       This test is a temporary test until python gains the ability to write decimal, so we're
+       embedding
+       a parquet file directly into the code here to prevent issues with finding the file */
+    const unsigned char decimals_parquet[] = {
+      0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0xb0, 0x03, 0x15, 0xb8, 0x03, 0x2c, 0x15, 0x6a,
+      0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36, 0x02, 0x28, 0x04, 0x7f, 0x96, 0x98, 0x00,
+      0x18, 0x04, 0x81, 0x69, 0x67, 0xff, 0x00, 0x00, 0x00, 0xd8, 0x01, 0xf0, 0xd7, 0x04, 0x00,
+      0x00, 0x00, 0x64, 0x01, 0x03, 0x06, 0x68, 0x12, 0xdc, 0xff, 0xbd, 0x18, 0xfd, 0xff, 0x64,
+      0x13, 0x80, 0x00, 0xb3, 0x5d, 0x62, 0x00, 0x90, 0x35, 0xa9, 0xff, 0xa2, 0xde, 0xe3, 0xff,
+      0xe9, 0xbf, 0x96, 0xff, 0x1f, 0x8a, 0x98, 0xff, 0xb1, 0x50, 0x34, 0x00, 0x88, 0x24, 0x59,
+      0x00, 0x2a, 0x33, 0xbe, 0xff, 0xd5, 0x16, 0xbc, 0xff, 0x13, 0x50, 0x8d, 0xff, 0xcb, 0x63,
+      0x2d, 0x00, 0x80, 0x8f, 0xbe, 0xff, 0x82, 0x40, 0x10, 0x00, 0x84, 0x68, 0x70, 0xff, 0x9b,
+      0x69, 0x78, 0x00, 0x14, 0x6c, 0x10, 0x00, 0x50, 0xd9, 0xe1, 0xff, 0xaa, 0xcd, 0x6a, 0x00,
+      0xcf, 0xb1, 0x28, 0x00, 0x77, 0x57, 0x8d, 0x00, 0xee, 0x05, 0x79, 0x00, 0xf0, 0x15, 0xeb,
+      0xff, 0x02, 0xe2, 0x06, 0x00, 0x87, 0x43, 0x86, 0x00, 0xf8, 0x2d, 0x2e, 0x00, 0xee, 0x2e,
+      0x98, 0xff, 0x39, 0xcb, 0x4d, 0x00, 0x1e, 0x6b, 0xea, 0xff, 0x80, 0x8e, 0x6c, 0xff, 0x97,
+      0x25, 0x26, 0x00, 0x4d, 0x0d, 0x0a, 0x00, 0xca, 0x64, 0x7f, 0x00, 0xf4, 0xbe, 0xa1, 0xff,
+      0xe2, 0x12, 0x6c, 0xff, 0xbd, 0x77, 0xae, 0xff, 0xf9, 0x4b, 0x36, 0x00, 0xb0, 0xe3, 0x79,
+      0xff, 0xa2, 0x2a, 0x29, 0x00, 0xcd, 0x06, 0xbc, 0xff, 0x2d, 0xa3, 0x7e, 0x00, 0xa9, 0x08,
+      0xa1, 0xff, 0xbf, 0x81, 0xd0, 0xff, 0x4f, 0x03, 0x73, 0x00, 0xb0, 0x99, 0x0c, 0x00, 0xbd,
+      0x6f, 0xf8, 0xff, 0x6b, 0x02, 0x05, 0x00, 0xc1, 0xe1, 0xba, 0xff, 0x81, 0x69, 0x67, 0xff,
+      0x7f, 0x96, 0x98, 0x00, 0x15, 0x00, 0x15, 0xd0, 0x06, 0x15, 0xda, 0x06, 0x2c, 0x15, 0x6a,
+      0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36, 0x02, 0x28, 0x08, 0xff, 0x3f, 0x7a, 0x10,
+      0xf3, 0x5a, 0x00, 0x00, 0x18, 0x08, 0x01, 0xc0, 0x85, 0xef, 0x0c, 0xa5, 0xff, 0xff, 0x00,
+      0x00, 0x00, 0xa8, 0x03, 0xf4, 0xa7, 0x01, 0x04, 0x00, 0x00, 0x00, 0x64, 0x01, 0x03, 0x06,
+      0x55, 0x6f, 0xc5, 0xe4, 0x9f, 0x1a, 0x00, 0x00, 0x47, 0x89, 0x0a, 0xe8, 0x58, 0xf0, 0xff,
+      0xff, 0x63, 0xee, 0x21, 0xdd, 0xdd, 0xca, 0xff, 0xff, 0xbe, 0x6f, 0x3b, 0xaa, 0xe9, 0x3d,
+      0x00, 0x00, 0xd6, 0x91, 0x2a, 0xb7, 0x08, 0x02, 0x00, 0x00, 0x75, 0x45, 0x2c, 0xd7, 0x76,
+      0x0c, 0x00, 0x00, 0x54, 0x49, 0x92, 0x44, 0x9c, 0xbf, 0xff, 0xff, 0x41, 0xa9, 0x6d, 0xec,
+      0x7a, 0xd0, 0xff, 0xff, 0x27, 0xa0, 0x23, 0x41, 0x44, 0xc1, 0xff, 0xff, 0x18, 0xd4, 0xe1,
+      0x30, 0xd3, 0xe0, 0xff, 0xff, 0x59, 0xac, 0x14, 0xf4, 0xec, 0x58, 0x00, 0x00, 0x2c, 0x17,
+      0x29, 0x57, 0x44, 0x13, 0x00, 0x00, 0xa2, 0x0d, 0x4a, 0xcc, 0x63, 0xff, 0xff, 0xff, 0x81,
+      0x33, 0xbc, 0xda, 0xd5, 0xda, 0xff, 0xff, 0x4c, 0x05, 0xf4, 0x78, 0x19, 0xea, 0xff, 0xff,
+      0x06, 0x71, 0x25, 0xde, 0x5a, 0xaf, 0xff, 0xff, 0x95, 0x32, 0x5f, 0x76, 0x98, 0xb3, 0xff,
+      0xff, 0xf1, 0x34, 0x3c, 0xbf, 0xa8, 0xbe, 0xff, 0xff, 0x27, 0x73, 0x40, 0x0c, 0x7d, 0xcd,
+      0xff, 0xff, 0x68, 0xa9, 0xc2, 0xe9, 0x2c, 0x03, 0x00, 0x00, 0x3f, 0x79, 0xd9, 0x04, 0x8c,
+      0xe5, 0xff, 0xff, 0x91, 0xb4, 0x9b, 0xe3, 0x8f, 0x21, 0x00, 0x00, 0xb8, 0x20, 0xc8, 0xc2,
+      0x4d, 0xa6, 0xff, 0xff, 0x47, 0xfa, 0xde, 0x36, 0x4a, 0xf3, 0xff, 0xff, 0x72, 0x80, 0x94,
+      0x59, 0xdd, 0x4e, 0x00, 0x00, 0x29, 0xe4, 0xd6, 0x43, 0xb0, 0xf0, 0xff, 0xff, 0x68, 0x36,
+      0xbc, 0x2d, 0xd1, 0xa9, 0xff, 0xff, 0xbc, 0xe4, 0xbe, 0xd7, 0xed, 0x1b, 0x00, 0x00, 0x02,
+      0x8b, 0xcb, 0xd7, 0xed, 0x47, 0x00, 0x00, 0x3c, 0x06, 0xe4, 0xda, 0xc7, 0x47, 0x00, 0x00,
+      0xf3, 0x39, 0x55, 0x28, 0x97, 0xba, 0xff, 0xff, 0x07, 0x79, 0x38, 0x4e, 0xe0, 0x21, 0x00,
+      0x00, 0xde, 0xed, 0x1c, 0x23, 0x09, 0x49, 0x00, 0x00, 0x49, 0x46, 0x49, 0x5d, 0x8f, 0x34,
+      0x00, 0x00, 0x38, 0x18, 0x50, 0xf6, 0xa1, 0x11, 0x00, 0x00, 0xdf, 0xb8, 0x19, 0x14, 0xd1,
+      0xe1, 0xff, 0xff, 0x2c, 0x56, 0x72, 0x93, 0x64, 0x3f, 0x00, 0x00, 0x1c, 0xe0, 0xbe, 0x87,
+      0x7d, 0xf9, 0xff, 0xff, 0x73, 0x0e, 0x3c, 0x01, 0x91, 0xf9, 0xff, 0xff, 0xb2, 0x37, 0x85,
+      0x81, 0x5f, 0x54, 0x00, 0x00, 0x58, 0x44, 0xb0, 0x1a, 0xac, 0xbb, 0xff, 0xff, 0x36, 0xbf,
+      0xbe, 0x5e, 0x22, 0xff, 0xff, 0xff, 0x06, 0x20, 0xa0, 0x23, 0x0d, 0x3b, 0x00, 0x00, 0x19,
+      0xc6, 0x49, 0x0a, 0x00, 0xcf, 0xff, 0xff, 0x4f, 0xcd, 0xc6, 0x95, 0x4b, 0xf1, 0xff, 0xff,
+      0xa3, 0x59, 0xaf, 0x65, 0xec, 0xe9, 0xff, 0xff, 0x58, 0xef, 0x05, 0x50, 0x63, 0xe4, 0xff,
+      0xff, 0xc7, 0x6a, 0x9e, 0xf1, 0x69, 0x20, 0x00, 0x00, 0xd1, 0xb3, 0xc9, 0x14, 0xb2, 0x29,
+      0x00, 0x00, 0x1d, 0x48, 0x16, 0x70, 0xf0, 0x40, 0x00, 0x00, 0x01, 0xc0, 0x85, 0xef, 0x0c,
+      0xa5, 0xff, 0xff, 0xff, 0x3f, 0x7a, 0x10, 0xf3, 0x5a, 0x00, 0x00, 0x15, 0x00, 0x15, 0x90,
+      0x0d, 0x15, 0x9a, 0x0d, 0x2c, 0x15, 0x6a, 0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36,
+      0x02, 0x28, 0x10, 0x4b, 0x3b, 0x4c, 0xa8, 0x5a, 0x86, 0xc4, 0x7a, 0x09, 0x8a, 0x22, 0x3f,
+      0xff, 0xff, 0xff, 0xff, 0x18, 0x10, 0xb4, 0xc4, 0xb3, 0x57, 0xa5, 0x79, 0x3b, 0x85, 0xf6,
+      0x75, 0xdd, 0xc0, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xc8, 0x06, 0xf4, 0x47, 0x03,
+      0x04, 0x00, 0x00, 0x00, 0x64, 0x01, 0x03, 0x06, 0x05, 0x49, 0xf7, 0xfc, 0x89, 0x3d, 0x3e,
+      0x20, 0x07, 0x72, 0x3e, 0xa1, 0x66, 0x81, 0x67, 0x80, 0x23, 0x78, 0x06, 0x68, 0x0e, 0x78,
+      0xf5, 0x08, 0xed, 0x20, 0xcd, 0x0e, 0x7f, 0x9c, 0x70, 0xa0, 0xb9, 0x16, 0x44, 0xb2, 0x41,
+      0x62, 0xba, 0x82, 0xad, 0xe1, 0x12, 0x9b, 0xa6, 0x53, 0x8d, 0x20, 0x27, 0xd5, 0x84, 0x63,
+      0xb8, 0x07, 0x4b, 0x5b, 0xa4, 0x1c, 0xa4, 0x1c, 0x17, 0xbf, 0x4b, 0x00, 0x24, 0x04, 0x56,
+      0xa8, 0x52, 0xaf, 0x33, 0xf7, 0xad, 0x7c, 0xc8, 0x83, 0x25, 0x13, 0xaf, 0x80, 0x25, 0x6f,
+      0xbd, 0xd1, 0x15, 0x69, 0x64, 0x20, 0x7b, 0xd7, 0x33, 0xba, 0x66, 0x29, 0x8a, 0x00, 0xda,
+      0x42, 0x07, 0x2c, 0x6c, 0x39, 0x76, 0x9f, 0xdc, 0x17, 0xad, 0xb6, 0x58, 0xdf, 0x5f, 0x00,
+      0x18, 0x3a, 0xae, 0x1c, 0xd6, 0x5f, 0x9d, 0x78, 0x8d, 0x73, 0xdd, 0x3e, 0xd6, 0x18, 0x33,
+      0x40, 0xe4, 0x36, 0xde, 0xb0, 0xb7, 0x33, 0x2a, 0x6b, 0x08, 0x03, 0x6c, 0x6d, 0x8f, 0x13,
+      0x93, 0xd0, 0xd7, 0x87, 0x62, 0x63, 0x53, 0xfb, 0xd8, 0xbb, 0xc9, 0x54, 0x90, 0xd6, 0xa9,
+      0x8f, 0xc8, 0x60, 0xbd, 0xec, 0x75, 0x23, 0x9a, 0x21, 0xec, 0xe4, 0x86, 0x43, 0xd7, 0xc1,
+      0x88, 0xdc, 0x82, 0x00, 0x32, 0x79, 0xc9, 0x2b, 0x70, 0x85, 0xb7, 0x25, 0xa1, 0xcc, 0x7d,
+      0x0b, 0x29, 0x03, 0xea, 0x80, 0xff, 0x9b, 0xf3, 0x24, 0x7f, 0xd1, 0xff, 0xf0, 0x22, 0x65,
+      0x85, 0x99, 0x17, 0x63, 0xc2, 0xc0, 0xb7, 0x62, 0x05, 0xda, 0x7a, 0xa0, 0xc3, 0x2a, 0x6f,
+      0x1f, 0xee, 0x1f, 0x31, 0xa8, 0x42, 0x80, 0xe4, 0xb7, 0x6c, 0xf6, 0xac, 0x47, 0xb0, 0x17,
+      0x69, 0xcb, 0xff, 0x66, 0x8a, 0xd6, 0x25, 0x00, 0xf3, 0xcf, 0x0a, 0xaf, 0xf8, 0x92, 0x8a,
+      0xa0, 0xdf, 0x71, 0x13, 0x8d, 0x9d, 0xff, 0x7e, 0xe0, 0x0a, 0x52, 0xf1, 0x97, 0x01, 0xa9,
+      0x73, 0x27, 0xfd, 0x63, 0x58, 0x00, 0x32, 0xa6, 0xf6, 0x78, 0xb8, 0xe4, 0xfd, 0x20, 0x7c,
+      0x90, 0xee, 0xad, 0x8c, 0xc9, 0x71, 0x35, 0x66, 0x71, 0x3c, 0xe0, 0xe4, 0x0b, 0xbb, 0xa0,
+      0x50, 0xe9, 0xf2, 0x81, 0x1d, 0x3a, 0x95, 0x94, 0x00, 0xd5, 0x49, 0x00, 0x07, 0xdf, 0x21,
+      0x53, 0x36, 0x8d, 0x9e, 0xd9, 0xa5, 0x52, 0x4d, 0x0d, 0x29, 0x74, 0xf0, 0x40, 0xbd, 0xda,
+      0x63, 0x4e, 0xdd, 0x91, 0x8e, 0xa6, 0xa7, 0xf6, 0x78, 0x58, 0x3b, 0x0a, 0x5c, 0x60, 0x3c,
+      0x15, 0x34, 0xf8, 0x2c, 0x21, 0xe3, 0x56, 0x1b, 0x9e, 0xd9, 0x56, 0xd3, 0x13, 0x2e, 0x80,
+      0x2c, 0x36, 0xda, 0x1d, 0xc8, 0xfb, 0x52, 0xee, 0x17, 0xb3, 0x2b, 0xf3, 0xd2, 0xeb, 0x29,
+      0xa0, 0x37, 0xa0, 0x12, 0xce, 0x1c, 0x50, 0x6a, 0xf4, 0x11, 0xcd, 0x96, 0x88, 0x3f, 0x43,
+      0x78, 0xc0, 0x2c, 0x53, 0x6c, 0xa6, 0xdf, 0xb9, 0x9e, 0x93, 0xd4, 0x1e, 0xa9, 0x7f, 0x67,
+      0xa6, 0xc1, 0x80, 0x46, 0x0f, 0x63, 0x7d, 0x15, 0xf2, 0x4c, 0xc5, 0xda, 0x11, 0x9a, 0x20,
+      0x67, 0x27, 0xe8, 0x00, 0xec, 0x03, 0x1d, 0x15, 0xa7, 0x92, 0xb3, 0x1f, 0xda, 0x20, 0x92,
+      0xd8, 0x00, 0xfb, 0x06, 0x80, 0xeb, 0x4b, 0x0c, 0xc1, 0x1f, 0x49, 0x40, 0x06, 0x8d, 0x8a,
+      0xf8, 0x34, 0xb1, 0x0c, 0x1d, 0x20, 0xd0, 0x47, 0xe5, 0xb1, 0x7e, 0xf7, 0xe4, 0xb4, 0x7e,
+      0x9c, 0x84, 0x18, 0x61, 0x32, 0x4f, 0xc0, 0xc2, 0xb2, 0xcc, 0x63, 0xf6, 0xe1, 0x16, 0xd6,
+      0xd9, 0x4b, 0x74, 0x13, 0x01, 0xa1, 0xe2, 0x00, 0xb7, 0x9e, 0xc1, 0x3a, 0xc5, 0xaf, 0xe8,
+      0x54, 0x07, 0x2a, 0x20, 0xfd, 0x2c, 0x6f, 0xb9, 0x80, 0x18, 0x92, 0x87, 0xa0, 0x81, 0x24,
+      0x60, 0x47, 0x17, 0x4f, 0xbc, 0xbe, 0xf5, 0x03, 0x69, 0x80, 0xe3, 0x10, 0x54, 0xd6, 0x68,
+      0x7d, 0x75, 0xd3, 0x0a, 0x45, 0x38, 0x9e, 0xa9, 0xfd, 0x05, 0x40, 0xd2, 0x1e, 0x6f, 0x5c,
+      0x30, 0x10, 0xfe, 0x9b, 0x9f, 0x6d, 0xc0, 0x9d, 0x6c, 0x17, 0x7d, 0x00, 0x09, 0xb6, 0x8a,
+      0x31, 0x8e, 0x1b, 0x6b, 0x84, 0x1e, 0x79, 0xce, 0x10, 0x55, 0x59, 0x6a, 0x40, 0x16, 0xdc,
+      0x9a, 0xcf, 0x4d, 0xb0, 0x8f, 0xac, 0xe3, 0x8d, 0xee, 0xd2, 0xef, 0x01, 0x8c, 0xe0, 0x2b,
+      0x24, 0xe5, 0xb4, 0xe1, 0x86, 0x72, 0x00, 0x30, 0x07, 0xce, 0x02, 0x23, 0x41, 0x33, 0x40,
+      0xf0, 0x9b, 0xc2, 0x2d, 0x30, 0xec, 0x3b, 0x17, 0xb2, 0x8f, 0x64, 0x7d, 0xcd, 0x70, 0x9e,
+      0x80, 0x22, 0xb5, 0xdf, 0x6d, 0x2a, 0x43, 0xd4, 0x2b, 0x5a, 0xf6, 0x96, 0xa6, 0xea, 0x91,
+      0x62, 0x80, 0x39, 0xf2, 0x5a, 0x8e, 0xc0, 0xb9, 0x29, 0x99, 0x17, 0xe7, 0x35, 0x2c, 0xf6,
+      0x4d, 0x18, 0x00, 0x48, 0x10, 0x85, 0xb4, 0x3f, 0x89, 0x60, 0x49, 0x6e, 0xf0, 0xcd, 0x9d,
+      0x92, 0xeb, 0x96, 0x80, 0xcf, 0xf9, 0xf1, 0x46, 0x1d, 0xc0, 0x49, 0xb3, 0x36, 0x2e, 0x24,
+      0xc8, 0xdb, 0x41, 0x72, 0x20, 0xf5, 0xde, 0x5c, 0xf9, 0x4a, 0x6e, 0xa0, 0x0b, 0x13, 0xfc,
+      0x2d, 0x17, 0x07, 0x16, 0x5e, 0x00, 0x3c, 0x54, 0x41, 0x0e, 0xa2, 0x0d, 0xf3, 0x48, 0x12,
+      0x2e, 0x7c, 0xab, 0x3c, 0x59, 0x1c, 0x40, 0xca, 0xb0, 0x71, 0xc7, 0x29, 0xf0, 0xbb, 0x9f,
+      0xf4, 0x3f, 0x25, 0x49, 0xad, 0xc2, 0x8f, 0x80, 0x04, 0x38, 0x6d, 0x35, 0x02, 0xca, 0xe6,
+      0x02, 0x83, 0x89, 0x4e, 0x74, 0xdb, 0x08, 0x5a, 0x80, 0x13, 0x99, 0xd4, 0x26, 0xc1, 0x27,
+      0xce, 0xb0, 0x98, 0x99, 0xca, 0xf6, 0x3e, 0x50, 0x49, 0xd0, 0xbf, 0xcb, 0x6f, 0xbe, 0x5b,
+      0x92, 0x63, 0xde, 0x94, 0xd3, 0x8f, 0x07, 0x06, 0x0f, 0x2b, 0x80, 0x36, 0xf1, 0x77, 0xf6,
+      0x29, 0x33, 0x13, 0xa9, 0x4a, 0x55, 0x3d, 0x6c, 0xca, 0xdb, 0x4e, 0x40, 0xc4, 0x95, 0x54,
+      0xf4, 0xe2, 0x8c, 0x1b, 0xa0, 0xfe, 0x30, 0x50, 0x9d, 0x62, 0xbc, 0x5c, 0x00, 0xb4, 0xc4,
+      0xb3, 0x57, 0xa5, 0x79, 0x3b, 0x85, 0xf6, 0x75, 0xdd, 0xc0, 0x00, 0x00, 0x00, 0x01, 0x4b,
+      0x3b, 0x4c, 0xa8, 0x5a, 0x86, 0xc4, 0x7a, 0x09, 0x8a, 0x22, 0x3f, 0xff, 0xff, 0xff, 0xff,
+      0x15, 0x02, 0x19, 0x4c, 0x48, 0x0c, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x5f, 0x73, 0x63, 0x68,
+      0x65, 0x6d, 0x61, 0x15, 0x06, 0x00, 0x15, 0x02, 0x25, 0x02, 0x18, 0x06, 0x64, 0x65, 0x63,
+      0x37, 0x70, 0x34, 0x25, 0x0a, 0x15, 0x08, 0x15, 0x0e, 0x00, 0x15, 0x04, 0x25, 0x02, 0x18,
+      0x07, 0x64, 0x65, 0x63, 0x31, 0x34, 0x70, 0x35, 0x25, 0x0a, 0x15, 0x0a, 0x15, 0x1c, 0x00,
+      0x15, 0x0e, 0x15, 0x20, 0x15, 0x02, 0x18, 0x08, 0x64, 0x65, 0x63, 0x33, 0x38, 0x70, 0x31,
+      0x38, 0x25, 0x0a, 0x15, 0x24, 0x15, 0x4c, 0x00, 0x16, 0x6a, 0x19, 0x1c, 0x19, 0x3c, 0x26,
+      0x08, 0x1c, 0x15, 0x02, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x06, 0x64, 0x65, 0x63,
+      0x37, 0x70, 0x34, 0x15, 0x02, 0x16, 0x6a, 0x16, 0xf6, 0x03, 0x16, 0xfe, 0x03, 0x26, 0x08,
+      0x3c, 0x36, 0x02, 0x28, 0x04, 0x7f, 0x96, 0x98, 0x00, 0x18, 0x04, 0x81, 0x69, 0x67, 0xff,
+      0x00, 0x19, 0x1c, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x26, 0x86, 0x04,
+      0x1c, 0x15, 0x04, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x07, 0x64, 0x65, 0x63, 0x31,
+      0x34, 0x70, 0x35, 0x15, 0x02, 0x16, 0x6a, 0x16, 0xa6, 0x07, 0x16, 0xb0, 0x07, 0x26, 0x86,
+      0x04, 0x3c, 0x36, 0x02, 0x28, 0x08, 0xff, 0x3f, 0x7a, 0x10, 0xf3, 0x5a, 0x00, 0x00, 0x18,
+      0x08, 0x01, 0xc0, 0x85, 0xef, 0x0c, 0xa5, 0xff, 0xff, 0x00, 0x19, 0x1c, 0x15, 0x00, 0x15,
+      0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x26, 0xb6, 0x0b, 0x1c, 0x15, 0x0e, 0x19, 0x35, 0x06,
+      0x08, 0x00, 0x19, 0x18, 0x08, 0x64, 0x65, 0x63, 0x33, 0x38, 0x70, 0x31, 0x38, 0x15, 0x02,
+      0x16, 0x6a, 0x16, 0x86, 0x0e, 0x16, 0x90, 0x0e, 0x26, 0xb6, 0x0b, 0x3c, 0x36, 0x02, 0x28,
+      0x10, 0x4b, 0x3b, 0x4c, 0xa8, 0x5a, 0x86, 0xc4, 0x7a, 0x09, 0x8a, 0x22, 0x3f, 0xff, 0xff,
+      0xff, 0xff, 0x18, 0x10, 0xb4, 0xc4, 0xb3, 0x57, 0xa5, 0x79, 0x3b, 0x85, 0xf6, 0x75, 0xdd,
+      0xc0, 0x00, 0x00, 0x00, 0x01, 0x00, 0x19, 0x1c, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00,
+      0x00, 0x00, 0x16, 0xa2, 0x19, 0x16, 0x6a, 0x00, 0x19, 0x2c, 0x18, 0x18, 0x6f, 0x72, 0x67,
+      0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x73, 0x70, 0x61, 0x72, 0x6b, 0x2e, 0x76,
+      0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x33, 0x2e, 0x30, 0x2e, 0x31, 0x00, 0x18,
+      0x29, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x73, 0x70, 0x61,
+      0x72, 0x6b, 0x2e, 0x73, 0x71, 0x6c, 0x2e, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2e,
+      0x72, 0x6f, 0x77, 0x2e, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0xf4, 0x01,
+      0x7b, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3a, 0x22, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74,
+      0x22, 0x2c, 0x22, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x22, 0x3a, 0x5b, 0x7b, 0x22, 0x6e,
+      0x61, 0x6d, 0x65, 0x22, 0x3a, 0x22, 0x64, 0x65, 0x63, 0x37, 0x70, 0x34, 0x22, 0x2c, 0x22,
+      0x74, 0x79, 0x70, 0x65, 0x22, 0x3a, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6d, 0x61, 0x6c, 0x28,
+      0x37, 0x2c, 0x34, 0x29, 0x22, 0x2c, 0x22, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65,
+      0x22, 0x3a, 0x74, 0x72, 0x75, 0x65, 0x2c, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
+      0x61, 0x22, 0x3a, 0x7b, 0x7d, 0x7d, 0x2c, 0x7b, 0x22, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x3a,
+      0x22, 0x64, 0x65, 0x63, 0x31, 0x34, 0x70, 0x35, 0x22, 0x2c, 0x22, 0x74, 0x79, 0x70, 0x65,
+      0x22, 0x3a, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6d, 0x61, 0x6c, 0x28, 0x31, 0x34, 0x2c, 0x35,
+      0x29, 0x22, 0x2c, 0x22, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x22, 0x3a, 0x74,
+      0x72, 0x75, 0x65, 0x2c, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3a,
+      0x7b, 0x7d, 0x7d, 0x2c, 0x7b, 0x22, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x3a, 0x22, 0x64, 0x65,
+      0x63, 0x33, 0x38, 0x70, 0x31, 0x38, 0x22, 0x2c, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3a,
+      0x22, 0x64, 0x65, 0x63, 0x69, 0x6d, 0x61, 0x6c, 0x28, 0x33, 0x38, 0x2c, 0x31, 0x38, 0x29,
+      0x22, 0x2c, 0x22, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x22, 0x3a, 0x74, 0x72,
+      0x75, 0x65, 0x2c, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3a, 0x7b,
+      0x7d, 0x7d, 0x5d, 0x7d, 0x00, 0x18, 0x4a, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2d,
+      0x6d, 0x72, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x31, 0x2e, 0x31, 0x30,
+      0x2e, 0x31, 0x20, 0x28, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x20, 0x61, 0x38, 0x39, 0x64, 0x66,
+      0x38, 0x66, 0x39, 0x39, 0x33, 0x32, 0x62, 0x36, 0x65, 0x66, 0x36, 0x36, 0x33, 0x33, 0x64,
+      0x30, 0x36, 0x30, 0x36, 0x39, 0x65, 0x35, 0x30, 0x63, 0x39, 0x62, 0x37, 0x39, 0x37, 0x30,
+      0x62, 0x65, 0x62, 0x64, 0x31, 0x29, 0x19, 0x3c, 0x1c, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x1c,
+      0x00, 0x00, 0x00, 0xd3, 0x02, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
+    unsigned int decimals_parquet_len = 2366;
+
+    cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder(
+      cudf::io::source_info{reinterpret_cast<const char*>(decimals_parquet), decimals_parquet_len});
+    auto result = cudf::io::read_parquet(read_opts);
+
+    auto validity =
+      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 50; });
+
+    EXPECT_EQ(result.tbl->view().num_columns(), 3);
+
+    int32_t col0_data[] = {
+      -2354584, -190275,  8393572,  6446515,  -5687920, -1843550, -6897687, -6780385, 3428529,
+      5842056,  -4312278, -4450603, -7516141, 2974667,  -4288640, 1065090,  -9410428, 7891355,
+      1076244,  -1975984, 6999466,  2666959,  9262967,  7931374,  -1370640, 451074,   8799111,
+      3026424,  -6803730, 5098297,  -1414370, -9662848, 2499991,  658765,   8348874,  -6177036,
+      -9694494, -5343299, 3558393,  -8789072, 2697890,  -4454707, 8299309,  -6223703, -3112513,
+      7537487,  825776,   -495683,  328299,   -4529727, 0,        -9999999, 9999999};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(0).size()),
+              sizeof(col0_data) / sizeof(col0_data[0]));
+    cudf::test::fixed_point_column_wrapper<int32_t> col0(
+      std::begin(col0_data), std::end(col0_data), validity, numeric::scale_type{-4});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), col0);
+
+    int64_t col1_data[] = {29274040266581,  -17210335917753, -58420730139037,
+                           68073792696254,  2236456014294,   13704555677045,
+                           -70797090469548, -52248605513407, -68976081919961,
+                           -34277313883112, 97774730521689,  21184241014572,
+                           -670882460254,   -40862944054399, -24079852370612,
+                           -88670167797498, -84007574359403, -71843004533519,
+                           -55538016554201, 3491435293032,   -29085437167297,
+                           36901882672273,  -98622066122568, -13974902998457,
+                           86712597643378,  -16835133643735, -94759096142232,
+                           30708340810940,  79086853262082,  78923696440892,
+                           -76316597208589, 37247268714759,  80303592631774,
+                           57790350050889,  19387319851064,  -33186875066145,
+                           69701203023404,  -7157433049060,  -7073790423437,
+                           92769171617714,  -75127120182184, -951893180618,
+                           64927618310150,  -53875897154023, -16168039035569,
+                           -24273449166429, -30359781249192, 35639397345991,
+                           45844829680593,  71401416837149,  0,
+                           -99999999999999, 99999999999999};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(1).size()),
+              sizeof(col1_data) / sizeof(col1_data[0]));
+    cudf::test::fixed_point_column_wrapper<int64_t> col1(
+      std::begin(col1_data), std::end(col1_data), validity, numeric::scale_type{-5});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1);
+
+    cudf::io::parquet_reader_options read_strict_opts = read_opts;
+    read_strict_opts.set_columns({"dec7p4", "dec14p5"});
+    EXPECT_NO_THROW(cudf::io::read_parquet(read_strict_opts));
+  }
+  {
+    // dec7p3: Decimal(precision=7, scale=3) backed by FIXED_LENGTH_BYTE_ARRAY(length = 4)
+    // dec12p11: Decimal(precision=12, scale=11) backed by FIXED_LENGTH_BYTE_ARRAY(length = 6)
+    // dec20p1: Decimal(precision=20, scale=1) backed by FIXED_LENGTH_BYTE_ARRAY(length = 9)
+    const unsigned char fixed_len_bytes_decimal_parquet[] = {
+      0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0xA8, 0x01, 0x15, 0xAE, 0x01, 0x2C, 0x15, 0x28,
+      0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x02, 0x28, 0x04, 0x00, 0x97, 0x45, 0x72,
+      0x18, 0x04, 0x00, 0x01, 0x81, 0x3B, 0x00, 0x00, 0x00, 0x54, 0xF0, 0x53, 0x04, 0x00, 0x00,
+      0x00, 0x26, 0x01, 0x03, 0x00, 0x00, 0x61, 0x10, 0xCF, 0x00, 0x0A, 0xA9, 0x08, 0x00, 0x77,
+      0x58, 0x6F, 0x00, 0x6B, 0xEE, 0xA4, 0x00, 0x92, 0xF8, 0x94, 0x00, 0x2E, 0x18, 0xD4, 0x00,
+      0x4F, 0x45, 0x33, 0x00, 0x97, 0x45, 0x72, 0x00, 0x0D, 0xC2, 0x75, 0x00, 0x76, 0xAA, 0xAA,
+      0x00, 0x30, 0x9F, 0x86, 0x00, 0x4B, 0x9D, 0xB1, 0x00, 0x4E, 0x4B, 0x3B, 0x00, 0x01, 0x81,
+      0x3B, 0x00, 0x22, 0xD4, 0x53, 0x00, 0x72, 0xC4, 0xAF, 0x00, 0x43, 0x9B, 0x72, 0x00, 0x1D,
+      0x91, 0xC3, 0x00, 0x45, 0x27, 0x48, 0x15, 0x00, 0x15, 0xF4, 0x01, 0x15, 0xFA, 0x01, 0x2C,
+      0x15, 0x28, 0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x02, 0x28, 0x06, 0x00, 0xD5,
+      0xD7, 0x31, 0x99, 0xA6, 0x18, 0x06, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00, 0x00, 0x00,
+      0x7A, 0xF0, 0x79, 0x04, 0x00, 0x00, 0x00, 0x24, 0x01, 0x03, 0x02, 0x00, 0x54, 0x23, 0xCF,
+      0x13, 0x0A, 0x00, 0x07, 0x22, 0xB1, 0x21, 0x7E, 0x00, 0x64, 0x19, 0xD6, 0xD2, 0xA5, 0x00,
+      0x61, 0x7F, 0xF6, 0xB9, 0xB0, 0x00, 0xD0, 0x7F, 0x9C, 0xA9, 0xE9, 0x00, 0x65, 0x58, 0xF0,
+      0xAD, 0xFB, 0x00, 0xBC, 0x61, 0xE2, 0x03, 0xDA, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00,
+      0x63, 0x4B, 0x4C, 0xFE, 0x45, 0x00, 0x7A, 0xA0, 0xD8, 0xD1, 0xC0, 0x00, 0xC0, 0x63, 0xF7,
+      0x9D, 0x0A, 0x00, 0x88, 0x22, 0x0F, 0x1B, 0x25, 0x00, 0x1A, 0x80, 0x56, 0x34, 0xC7, 0x00,
+      0x5F, 0x48, 0x61, 0x09, 0x7C, 0x00, 0x61, 0xEF, 0x92, 0x42, 0x2F, 0x00, 0xD5, 0xD7, 0x31,
+      0x99, 0xA6, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00, 0x71, 0xDD, 0xE2, 0x22, 0x7B, 0x00,
+      0x54, 0xBF, 0xAE, 0xE9, 0x3C, 0x15, 0x00, 0x15, 0xD4, 0x02, 0x15, 0xDC, 0x02, 0x2C, 0x15,
+      0x28, 0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x04, 0x28, 0x09, 0x00, 0x7D, 0xFE,
+      0x02, 0xDA, 0xB2, 0x62, 0xA3, 0xFB, 0x18, 0x09, 0x00, 0x03, 0x9C, 0xCD, 0x5A, 0xAC, 0xBB,
+      0xF1, 0xE3, 0x00, 0x00, 0x00, 0xAA, 0x01, 0xF0, 0xA9, 0x04, 0x00, 0x00, 0x00, 0x07, 0xBF,
+      0xBF, 0x0F, 0x00, 0x7D, 0xFE, 0x02, 0xDA, 0xB2, 0x62, 0xA3, 0xFB, 0x00, 0x7D, 0x9A, 0xCB,
+      0xDA, 0x4B, 0x10, 0x8B, 0xAC, 0x00, 0x20, 0xBA, 0x97, 0x87, 0x2E, 0x3B, 0x4E, 0x04, 0x00,
+      0x15, 0xBB, 0xC2, 0xDF, 0x2D, 0x25, 0x08, 0xB6, 0x00, 0x5C, 0x67, 0x0E, 0x36, 0x30, 0xF1,
+      0xAC, 0xA4, 0x00, 0x44, 0xF1, 0x8E, 0xFB, 0x17, 0x5E, 0xE1, 0x96, 0x00, 0x64, 0x69, 0xF9,
+      0x66, 0x3F, 0x11, 0xED, 0xB9, 0x00, 0x45, 0xB5, 0xDA, 0x14, 0x9C, 0xA3, 0xFA, 0x64, 0x00,
+      0x26, 0x5F, 0xDE, 0xD7, 0x67, 0x95, 0xEF, 0xB1, 0x00, 0x35, 0xDB, 0x9B, 0x88, 0x46, 0xD0,
+      0xA1, 0x0E, 0x00, 0x45, 0xA9, 0x92, 0x8E, 0x89, 0xD1, 0xAC, 0x4C, 0x00, 0x4C, 0xF1, 0xCB,
+      0x27, 0x82, 0x3A, 0x7D, 0xB7, 0x00, 0x64, 0xD3, 0xD2, 0x2F, 0x9C, 0x83, 0x16, 0x75, 0x00,
+      0x15, 0xDF, 0xC2, 0xA9, 0x63, 0xB8, 0x33, 0x65, 0x00, 0x27, 0x40, 0x28, 0x97, 0x05, 0x8E,
+      0xE3, 0x46, 0x00, 0x03, 0x9C, 0xCD, 0x5A, 0xAC, 0xBB, 0xF1, 0xE3, 0x00, 0x22, 0x23, 0xF5,
+      0xE8, 0x9D, 0x55, 0xD4, 0x9C, 0x00, 0x25, 0xB9, 0xD8, 0x87, 0x2D, 0xF1, 0xF2, 0x17, 0x15,
+      0x02, 0x19, 0x4C, 0x48, 0x0C, 0x73, 0x70, 0x61, 0x72, 0x6B, 0x5F, 0x73, 0x63, 0x68, 0x65,
+      0x6D, 0x61, 0x15, 0x06, 0x00, 0x15, 0x0E, 0x15, 0x08, 0x15, 0x02, 0x18, 0x06, 0x64, 0x65,
+      0x63, 0x37, 0x70, 0x33, 0x25, 0x0A, 0x15, 0x06, 0x15, 0x0E, 0x00, 0x15, 0x0E, 0x15, 0x0C,
+      0x15, 0x02, 0x18, 0x08, 0x64, 0x65, 0x63, 0x31, 0x32, 0x70, 0x31, 0x31, 0x25, 0x0A, 0x15,
+      0x16, 0x15, 0x18, 0x00, 0x15, 0x0E, 0x15, 0x12, 0x15, 0x02, 0x18, 0x07, 0x64, 0x65, 0x63,
+      0x32, 0x30, 0x70, 0x31, 0x25, 0x0A, 0x15, 0x02, 0x15, 0x28, 0x00, 0x16, 0x28, 0x19, 0x1C,
+      0x19, 0x3C, 0x26, 0x08, 0x1C, 0x15, 0x0E, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x06,
+      0x64, 0x65, 0x63, 0x37, 0x70, 0x33, 0x15, 0x02, 0x16, 0x28, 0x16, 0xEE, 0x01, 0x16, 0xF4,
+      0x01, 0x26, 0x08, 0x3C, 0x36, 0x02, 0x28, 0x04, 0x00, 0x97, 0x45, 0x72, 0x18, 0x04, 0x00,
+      0x01, 0x81, 0x3B, 0x00, 0x19, 0x1C, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00, 0x00, 0x00,
+      0x26, 0xFC, 0x01, 0x1C, 0x15, 0x0E, 0x19, 0x35, 0x06, 0x08, 0x00, 0x19, 0x18, 0x08, 0x64,
+      0x65, 0x63, 0x31, 0x32, 0x70, 0x31, 0x31, 0x15, 0x02, 0x16, 0x28, 0x16, 0xC2, 0x02, 0x16,
+      0xC8, 0x02, 0x26, 0xFC, 0x01, 0x3C, 0x36, 0x02, 0x28, 0x06, 0x00, 0xD5, 0xD7, 0x31, 0x99,
+      0xA6, 0x18, 0x06, 0xFF, 0x17, 0x2B, 0x5A, 0xF0, 0x01, 0x00, 0x19, 0x1C, 0x15, 0x00, 0x15,
+      0x00, 0x15, 0x02, 0x00, 0x00, 0x00, 0x26, 0xC4, 0x04, 0x1C, 0x15, 0x0E, 0x19, 0x35, 0x06,
+      0x08, 0x00, 0x19, 0x18, 0x07, 0x64, 0x65, 0x63, 0x32, 0x30, 0x70, 0x31, 0x15, 0x02, 0x16,
+      0x28, 0x16, 0xAE, 0x03, 0x16, 0xB6, 0x03, 0x26, 0xC4, 0x04, 0x3C, 0x36, 0x04, 0x28, 0x09,
+      0x00, 0x7D, 0xFE, 0x02, 0xDA, 0xB2, 0x62, 0xA3, 0xFB, 0x18, 0x09, 0x00, 0x03, 0x9C, 0xCD,
+      0x5A, 0xAC, 0xBB, 0xF1, 0xE3, 0x00, 0x19, 0x1C, 0x15, 0x00, 0x15, 0x00, 0x15, 0x02, 0x00,
+      0x00, 0x00, 0x16, 0xDE, 0x07, 0x16, 0x28, 0x00, 0x19, 0x2C, 0x18, 0x18, 0x6F, 0x72, 0x67,
+      0x2E, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2E, 0x73, 0x70, 0x61, 0x72, 0x6B, 0x2E, 0x76,
+      0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x18, 0x05, 0x33, 0x2E, 0x30, 0x2E, 0x31, 0x00, 0x18,
+      0x29, 0x6F, 0x72, 0x67, 0x2E, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2E, 0x73, 0x70, 0x61,
+      0x72, 0x6B, 0x2E, 0x73, 0x71, 0x6C, 0x2E, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2E,
+      0x72, 0x6F, 0x77, 0x2E, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0xF4, 0x01,
+      0x7B, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x22, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74,
+      0x22, 0x2C, 0x22, 0x66, 0x69, 0x65, 0x6C, 0x64, 0x73, 0x22, 0x3A, 0x5B, 0x7B, 0x22, 0x6E,
+      0x61, 0x6D, 0x65, 0x22, 0x3A, 0x22, 0x64, 0x65, 0x63, 0x37, 0x70, 0x33, 0x22, 0x2C, 0x22,
+      0x74, 0x79, 0x70, 0x65, 0x22, 0x3A, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6D, 0x61, 0x6C, 0x28,
+      0x37, 0x2C, 0x33, 0x29, 0x22, 0x2C, 0x22, 0x6E, 0x75, 0x6C, 0x6C, 0x61, 0x62, 0x6C, 0x65,
+      0x22, 0x3A, 0x74, 0x72, 0x75, 0x65, 0x2C, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
+      0x61, 0x22, 0x3A, 0x7B, 0x7D, 0x7D, 0x2C, 0x7B, 0x22, 0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A,
+      0x22, 0x64, 0x65, 0x63, 0x31, 0x32, 0x70, 0x31, 0x31, 0x22, 0x2C, 0x22, 0x74, 0x79, 0x70,
+      0x65, 0x22, 0x3A, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6D, 0x61, 0x6C, 0x28, 0x31, 0x32, 0x2C,
+      0x31, 0x31, 0x29, 0x22, 0x2C, 0x22, 0x6E, 0x75, 0x6C, 0x6C, 0x61, 0x62, 0x6C, 0x65, 0x22,
+      0x3A, 0x74, 0x72, 0x75, 0x65, 0x2C, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61,
+      0x22, 0x3A, 0x7B, 0x7D, 0x7D, 0x2C, 0x7B, 0x22, 0x6E, 0x61, 0x6D, 0x65, 0x22, 0x3A, 0x22,
+      0x64, 0x65, 0x63, 0x32, 0x30, 0x70, 0x31, 0x22, 0x2C, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22,
+      0x3A, 0x22, 0x64, 0x65, 0x63, 0x69, 0x6D, 0x61, 0x6C, 0x28, 0x32, 0x30, 0x2C, 0x31, 0x29,
+      0x22, 0x2C, 0x22, 0x6E, 0x75, 0x6C, 0x6C, 0x61, 0x62, 0x6C, 0x65, 0x22, 0x3A, 0x74, 0x72,
+      0x75, 0x65, 0x2C, 0x22, 0x6D, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3A, 0x7B,
+      0x7D, 0x7D, 0x5D, 0x7D, 0x00, 0x18, 0x4A, 0x70, 0x61, 0x72, 0x71, 0x75, 0x65, 0x74, 0x2D,
+      0x6D, 0x72, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x20, 0x31, 0x2E, 0x31, 0x30,
+      0x2E, 0x31, 0x20, 0x28, 0x62, 0x75, 0x69, 0x6C, 0x64, 0x20, 0x61, 0x38, 0x39, 0x64, 0x66,
+      0x38, 0x66, 0x39, 0x39, 0x33, 0x32, 0x62, 0x36, 0x65, 0x66, 0x36, 0x36, 0x33, 0x33, 0x64,
+      0x30, 0x36, 0x30, 0x36, 0x39, 0x65, 0x35, 0x30, 0x63, 0x39, 0x62, 0x37, 0x39, 0x37, 0x30,
+      0x62, 0x65, 0x62, 0x64, 0x31, 0x29, 0x19, 0x3C, 0x1C, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x1C,
+      0x00, 0x00, 0x00, 0xC5, 0x02, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31,
+    };
+
+    unsigned int parquet_len = 1226;
+
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{
+        reinterpret_cast<const char*>(fixed_len_bytes_decimal_parquet), parquet_len});
+    auto result = cudf::io::read_parquet(read_opts);
+    EXPECT_EQ(result.tbl->view().num_columns(), 3);
+
+    auto validity_c0    = cudf::test::iterators::nulls_at({19});
+    int32_t col0_data[] = {6361295, 698632,  7821423, 7073444, 9631892, 3021012, 5195059,
+                           9913714, 901749,  7776938, 3186566, 4955569, 5131067, 98619,
+                           2282579, 7521455, 4430706, 1937859, 4532040, 0};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(0).size()),
+              sizeof(col0_data) / sizeof(col0_data[0]));
+    cudf::test::fixed_point_column_wrapper<int32_t> col0(
+      std::begin(col0_data), std::end(col0_data), validity_c0, numeric::scale_type{-3});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), col0);
+
+    auto validity_c1    = cudf::test::iterators::nulls_at({18});
+    int64_t col1_data[] = {361378026250,
+                           30646804862,
+                           429930238629,
+                           418758703536,
+                           895494171113,
+                           435283865083,
+                           809096053722,
+                           -999999999999,
+                           426465099333,
+                           526684574144,
+                           826310892810,
+                           584686967589,
+                           113822282951,
+                           409236212092,
+                           420631167535,
+                           918438386086,
+                           -999999999999,
+                           489053889147,
+                           0,
+                           363993164092};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(1).size()),
+              sizeof(col1_data) / sizeof(col1_data[0]));
+    cudf::test::fixed_point_column_wrapper<int64_t> col1(
+      std::begin(col1_data), std::end(col1_data), validity_c1, numeric::scale_type{-11});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1);
+
+    auto validity_c2       = cudf::test::iterators::nulls_at({6, 14});
+    __int128_t col2_data[] = {9078697037144433659,
+                              9050770539577117612,
+                              2358363961733893636,
+                              1566059559232276662,
+                              6658306200002735268,
+                              4967909073046397334,
+                              0,
+                              7235588493887532473,
+                              5023160741463849572,
+                              2765173712965988273,
+                              3880866513515749646,
+                              5019704400576359500,
+                              5544435986818825655,
+                              7265381725809874549,
+                              0,
+                              1576192427381240677,
+                              2828305195087094598,
+                              260308667809395171,
+                              2460080200895288476,
+                              2718441925197820439};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(2).size()),
+              sizeof(col2_data) / sizeof(col2_data[0]));
+    cudf::test::fixed_point_column_wrapper<__int128_t> col2(
+      std::begin(col2_data), std::end(col2_data), validity_c2, numeric::scale_type{-1});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), col2);
+  }
+}
+
+TEST_F(ParquetReaderTest, EmptyOutput)
+{
+  cudf::test::fixed_width_column_wrapper<int> c0;
+  cudf::test::strings_column_wrapper c1;
+  cudf::test::fixed_point_column_wrapper<int> c2({}, numeric::scale_type{2});
+  cudf::test::lists_column_wrapper<float> _c3{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}};
+  auto c3 = cudf::empty_like(_c3);
+
+  cudf::test::fixed_width_column_wrapper<int> sc0;
+  cudf::test::strings_column_wrapper sc1;
+  cudf::test::lists_column_wrapper<int> _sc2{{1, 2}};
+  std::vector<std::unique_ptr<cudf::column>> struct_children;
+  struct_children.push_back(sc0.release());
+  struct_children.push_back(sc1.release());
+  struct_children.push_back(cudf::empty_like(_sc2));
+  cudf::test::structs_column_wrapper c4(std::move(struct_children));
+
+  table_view expected({c0, c1, c2, *c3, c4});
+
+  // set precision on the decimal column
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[2].set_decimal_precision(1);
+
+  auto filepath = temp_env->get_temp_filepath("EmptyOutput.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  out_args.set_metadata(&expected_metadata);
+  cudf::io::write_parquet(out_args);
+
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_args);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TEST_F(ParquetWriterTest, RowGroupSizeInvalid)
+{
+  const auto unused_table = std::make_unique<table>();
+  std::vector<char> out_buffer;
+
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
+                 .row_group_size_rows(4999),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
+                 .max_page_size_rows(4999),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
+                 .row_group_size_bytes(3 << 10),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
+                 .max_page_size_bytes(3 << 10),
+               cudf::logic_error);
+
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                 .row_group_size_rows(4999),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                 .max_page_size_rows(4999),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                 .row_group_size_bytes(3 << 10),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                 .max_page_size_bytes(3 << 10),
+               cudf::logic_error);
+}
+
+TEST_F(ParquetWriterTest, RowGroupPageSizeMatch)
+{
+  const auto unused_table = std::make_unique<table>();
+  std::vector<char> out_buffer;
+
+  auto options = cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                           unused_table->view())
+                   .row_group_size_bytes(128 * 1024)
+                   .max_page_size_bytes(512 * 1024)
+                   .row_group_size_rows(10000)
+                   .max_page_size_rows(20000)
+                   .build();
+  EXPECT_EQ(options.get_row_group_size_bytes(), options.get_max_page_size_bytes());
+  EXPECT_EQ(options.get_row_group_size_rows(), options.get_max_page_size_rows());
+}
+
+TEST_F(ParquetChunkedWriterTest, RowGroupPageSizeMatch)
+{
+  std::vector<char> out_buffer;
+
+  auto options = cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                   .row_group_size_bytes(128 * 1024)
+                   .max_page_size_bytes(512 * 1024)
+                   .row_group_size_rows(10000)
+                   .max_page_size_rows(20000)
+                   .build();
+  EXPECT_EQ(options.get_row_group_size_bytes(), options.get_max_page_size_bytes());
+  EXPECT_EQ(options.get_row_group_size_rows(), options.get_max_page_size_rows());
+}
+
+TEST_F(ParquetWriterTest, EmptyList)
+{
+  auto L1 = cudf::make_lists_column(0,
+                                    cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)),
+                                    cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}),
+                                    0,
+                                    {});
+  auto L0 = cudf::make_lists_column(
+    3, cudf::test::fixed_width_column_wrapper<int32_t>{0, 0, 0, 0}.release(), std::move(L1), 0, {});
+
+  auto filepath = temp_env->get_temp_filepath("EmptyList.parquet");
+  cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
+                                                                   cudf::table_view({*L0})));
+
+  auto result = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
+
+  using lcw     = cudf::test::lists_column_wrapper<int64_t>;
+  auto expected = lcw{lcw{}, lcw{}, lcw{}};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), expected);
+}
+
+TEST_F(ParquetWriterTest, DeepEmptyList)
+{
+  // Make a list column LLLi st only L is valid and LLi are all null. This tests whether we can
+  // handle multiple nullptr offsets
+
+  auto L2 = cudf::make_lists_column(0,
+                                    cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)),
+                                    cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}),
+                                    0,
+                                    {});
+  auto L1 = cudf::make_lists_column(
+    0, cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)), std::move(L2), 0, {});
+  auto L0 = cudf::make_lists_column(
+    3, cudf::test::fixed_width_column_wrapper<int32_t>{0, 0, 0, 0}.release(), std::move(L1), 0, {});
+
+  auto filepath = temp_env->get_temp_filepath("DeepEmptyList.parquet");
+  cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
+                                                                   cudf::table_view({*L0})));
+
+  auto result = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0);
+}
+
+TEST_F(ParquetWriterTest, EmptyListWithStruct)
+{
+  auto L2 = cudf::make_lists_column(0,
+                                    cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)),
+                                    cudf::make_empty_column(cudf::data_type{cudf::type_id::INT64}),
+                                    0,
+                                    {});
+
+  auto children = std::vector<std::unique_ptr<cudf::column>>{};
+  children.push_back(std::move(L2));
+  auto S2 = cudf::make_structs_column(0, std::move(children), 0, {});
+  auto L1 = cudf::make_lists_column(
+    0, cudf::make_empty_column(cudf::data_type(cudf::type_id::INT32)), std::move(S2), 0, {});
+  auto L0 = cudf::make_lists_column(
+    3, cudf::test::fixed_width_column_wrapper<int32_t>{0, 0, 0, 0}.release(), std::move(L1), 0, {});
+
+  auto filepath = temp_env->get_temp_filepath("EmptyListWithStruct.parquet");
+  cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
+                                                                   cudf::table_view({*L0})));
+  auto result = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0);
+}
+
+TEST_F(ParquetWriterTest, CheckPageRows)
+{
+  auto sequence = thrust::make_counting_iterator(0);
+  auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
+
+  constexpr auto page_rows = 5000;
+  constexpr auto num_rows  = 2 * page_rows;
+  column_wrapper<int> col(sequence, sequence + num_rows, validity);
+
+  auto expected = table_view{{col}};
+
+  auto const filepath = temp_env->get_temp_filepath("CheckPageRows.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .max_page_size_rows(page_rows);
+  cudf::io::write_parquet(out_opts);
+
+  // check first page header and make sure it has only page_rows values
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  CUDF_EXPECTS(fmd.row_groups.size() > 0, "No row groups found");
+  CUDF_EXPECTS(fmd.row_groups[0].columns.size() == 1, "Invalid number of columns");
+  auto const& first_chunk = fmd.row_groups[0].columns[0].meta_data;
+  CUDF_EXPECTS(first_chunk.data_page_offset > 0, "Invalid location for first data page");
+
+  // read first data page header.  sizeof(PageHeader) is not exact, but the thrift encoded
+  // version should be smaller than size of the struct.
+  auto const ph = read_page_header(
+    source, {first_chunk.data_page_offset, sizeof(cudf::io::parquet::PageHeader), 0});
+
+  EXPECT_EQ(ph.data_page_header.num_values, page_rows);
+}
+
+TEST_F(ParquetWriterTest, Decimal128Stats)
+{
+  // check that decimal128 min and max statistics are written in network byte order
+  // this is negative, so should be the min
+  std::vector<uint8_t> expected_min{
+    0xa1, 0xb2, 0xc3, 0xd4, 0xe5, 0xf6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  std::vector<uint8_t> expected_max{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xa1, 0xb2, 0xc3, 0xd4, 0xe5, 0xf6};
+
+  __int128_t val0 = 0xa1b2'c3d4'e5f6ULL;
+  __int128_t val1 = val0 << 80;
+  column_wrapper<numeric::decimal128> col0{{numeric::decimal128(val0, numeric::scale_type{0}),
+                                            numeric::decimal128(val1, numeric::scale_type{0})}};
+
+  auto expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("Decimal128Stats.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  auto const stats = parse_statistics(fmd.row_groups[0].columns[0]);
+
+  EXPECT_EQ(expected_min, stats.min_value);
+  EXPECT_EQ(expected_max, stats.max_value);
+}
+
+// =============================================================================
+// ---- test data for stats sort order tests
+// need at least 3 pages, and min page count is 5000, so need at least 15000 values.
+// use 20000 to be safe.
+static constexpr int num_ordered_rows            = 20000;
+static constexpr int page_size_for_ordered_tests = 5000;
+
+namespace {
+namespace testdata {
+// ----- most numerics. scale by 100 so all values fit in a single byte
+
+template <typename T>
+std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
+                 cudf::test::fixed_width_column_wrapper<T>>
+ascending()
+{
+  int start = std::is_signed_v<T> ? -num_ordered_rows / 2 : 0;
+  auto elements =
+    cudf::detail::make_counting_transform_iterator(start, [](auto i) { return i / 100; });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
+                 cudf::test::fixed_width_column_wrapper<T>>
+descending()
+{
+  if (std::is_signed_v<T>) {
+    auto elements = cudf::detail::make_counting_transform_iterator(-num_ordered_rows / 2,
+                                                                   [](auto i) { return -i / 100; });
+    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+  } else {
+    auto elements = cudf::detail::make_counting_transform_iterator(
+      0, [](auto i) { return (num_ordered_rows - i) / 100; });
+    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+  }
+}
+
+template <typename T>
+std::enable_if_t<std::is_arithmetic_v<T> && !std::is_same_v<T, bool>,
+                 cudf::test::fixed_width_column_wrapper<T>>
+unordered()
+{
+  if (std::is_signed_v<T>) {
+    auto elements = cudf::detail::make_counting_transform_iterator(
+      -num_ordered_rows / 2, [](auto i) { return (i % 2 ? i : -i) / 100; });
+    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+  } else {
+    auto elements = cudf::detail::make_counting_transform_iterator(
+      0, [](auto i) { return (i % 2 ? i : num_ordered_rows - i) / 100; });
+    return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+  }
+}
+
+// ----- bool
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> ascending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return i < num_ordered_rows / 2 ? false : true; });
+  return cudf::test::fixed_width_column_wrapper<bool>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> descending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return i < num_ordered_rows / 2 ? true : false; });
+  return cudf::test::fixed_width_column_wrapper<bool>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, bool>, cudf::test::fixed_width_column_wrapper<bool>> unordered()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    switch (i / page_size_for_ordered_tests) {
+      case 0: return true;
+      case 1: return false;
+      case 2: return true;
+      default: return false;
+    }
+  });
+  return cudf::test::fixed_width_column_wrapper<bool>(elements, elements + num_ordered_rows);
+}
+
+// ----- fixed point types
+
+template <typename T>
+std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    -num_ordered_rows / 2, [](auto i) { return T(i, numeric::scale_type{0}); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> descending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    -num_ordered_rows / 2, [](auto i) { return T(-i, numeric::scale_type{0}); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<cudf::is_fixed_point<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    -num_ordered_rows / 2, [](auto i) { return T(i % 2 ? i : -i, numeric::scale_type{0}); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+// ----- chrono types
+// ----- timstamp
+
+template <typename T>
+std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return T(typename T::duration(i)); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> descending()
 {
-  std::mt19937 gen(6542);
-  std::bernoulli_distribution bn(0.7f);
-  auto values = thrust::make_counting_iterator(0);
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return T(typename T::duration(num_ordered_rows - i)); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
 
-  constexpr cudf::size_type num_rows = 40000;
-  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
-  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
+template <typename T>
+std::enable_if_t<cudf::is_timestamp<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return T(typename T::duration(i % 2 ? i : num_ordered_rows - i)); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
 
-  cudf::table_view t({a, b});
-  cudf::io::parquet_writer_options opts = cudf::io::parquet_writer_options::builder(
-    cudf::io::sink_info{"/tmp/chunked_splits.parquet"}, t);
-  cudf::io::write_parquet(opts);
+// ----- duration
 
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{"/tmp/chunked_splits.parquet"});
-  auto result = cudf::io::read_parquet(in_opts);
-  printf("\nResult size read all: %d\n\n", result.tbl->num_rows());
+template <typename T>
+std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> ascending()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return T(i); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
 }
 
-#else
-TEST_F(ParquetChunkedReaderTest, TestChunkedRead)
+template <typename T>
+std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> descending()
 {
-  std::mt19937 gen(6542);
-  std::bernoulli_distribution bn(0.7f);
-  auto values = thrust::make_counting_iterator(0);
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return T(num_ordered_rows - i); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<cudf::is_duration<T>(), cudf::test::fixed_width_column_wrapper<T>> unordered()
+{
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return T(i % 2 ? i : num_ordered_rows - i); });
+  return cudf::test::fixed_width_column_wrapper<T>(elements, elements + num_ordered_rows);
+}
 
-  constexpr cudf::size_type num_rows = 40000;
-  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
-  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
+// ----- string_view
 
-  auto filepath = std::string{"/tmp/chunked_splits.parquet"};
-  cudf::table_view t({a, b});
-  cudf::io::parquet_writer_options opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, t);
-  cudf::io::write_parquet(opts);
+template <typename T>
+std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
+ascending()
+{
+  char buf[10];
+  auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
+    sprintf(buf, "%09d", i);
+    return std::string(buf);
+  });
+  return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
+}
 
-  //========================================================================================
-  {
-    cudf::io::parquet_reader_options in_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    auto result = cudf::io::read_parquet(in_opts);
-    printf("Result size read full: %d\n\n\n\n\n", result.tbl->num_rows());
+template <typename T>
+std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
+descending()
+{
+  char buf[10];
+  auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
+    sprintf(buf, "%09d", num_ordered_rows - i);
+    return std::string(buf);
+  });
+  return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
+}
+
+template <typename T>
+std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
+unordered()
+{
+  char buf[10];
+  auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
+    sprintf(buf, "%09d", (i % 2 == 0) ? i : (num_ordered_rows - i));
+    return std::string(buf);
+  });
+  return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
+}
+
+}  // namespace testdata
+}  // anonymous namespace
+
+TYPED_TEST(ParquetWriterComparableTypeTest, ThreeColumnSorted)
+{
+  using T = TypeParam;
+
+  auto col0 = testdata::ascending<T>();
+  auto col1 = testdata::descending<T>();
+  auto col2 = testdata::unordered<T>();
+
+  auto const expected = table_view{{col0, col1, col2}};
+
+  auto const filepath = temp_env->get_temp_filepath("ThreeColumnSorted.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .max_page_size_rows(page_size_for_ordered_tests)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  CUDF_EXPECTS(fmd.row_groups.size() > 0, "No row groups found");
+
+  auto const& columns = fmd.row_groups[0].columns;
+  CUDF_EXPECTS(columns.size() == static_cast<size_t>(expected.num_columns()),
+               "Invalid number of columns");
+
+  // now check that the boundary order for chunk 1 is ascending,
+  // chunk 2 is descending, and chunk 3 is unordered
+  cudf::io::parquet::BoundaryOrder expected_orders[] = {
+    cudf::io::parquet::BoundaryOrder::ASCENDING,
+    cudf::io::parquet::BoundaryOrder::DESCENDING,
+    cudf::io::parquet::BoundaryOrder::UNORDERED};
+
+  for (std::size_t i = 0; i < columns.size(); i++) {
+    auto const ci = read_column_index(source, columns[i]);
+    EXPECT_EQ(ci.boundary_order, expected_orders[i]);
   }
+}
+
+// utility functions for column index tests
+
+// compare two values.  return -1 if v1 < v2,
+// 0 if v1 == v2, and 1 if v1 > v2.
+template <typename T>
+int32_t compare(T& v1, T& v2)
+{
+  return (v1 > v2) - (v1 < v2);
+}
+
+// compare two binary statistics blobs based on their physical
+// and converted types. returns -1 if v1 < v2, 0 if v1 == v2, and
+// 1 if v1 > v2.
+int32_t compare_binary(const std::vector<uint8_t>& v1,
+                       const std::vector<uint8_t>& v2,
+                       cudf::io::parquet::Type ptype,
+                       cudf::io::parquet::ConvertedType ctype)
+{
+  switch (ptype) {
+    case cudf::io::parquet::INT32:
+      switch (ctype) {
+        case cudf::io::parquet::UINT_8:
+        case cudf::io::parquet::UINT_16:
+        case cudf::io::parquet::UINT_32:
+          return compare(*(reinterpret_cast<const uint32_t*>(v1.data())),
+                         *(reinterpret_cast<const uint32_t*>(v2.data())));
+        default:
+          return compare(*(reinterpret_cast<const int32_t*>(v1.data())),
+                         *(reinterpret_cast<const int32_t*>(v2.data())));
+      }
+
+    case cudf::io::parquet::INT64:
+      if (ctype == cudf::io::parquet::UINT_64) {
+        return compare(*(reinterpret_cast<const uint64_t*>(v1.data())),
+                       *(reinterpret_cast<const uint64_t*>(v2.data())));
+      }
+      return compare(*(reinterpret_cast<const int64_t*>(v1.data())),
+                     *(reinterpret_cast<const int64_t*>(v2.data())));
 
-  cudf::io::chunked_parquet_reader_options in_opts =
-    cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath});
-  in_opts.set_byte_limit(240000);
+    case cudf::io::parquet::FLOAT:
+      return compare(*(reinterpret_cast<const float*>(v1.data())),
+                     *(reinterpret_cast<const float*>(v2.data())));
 
-  cudf::io::chunked_parquet_reader reader(in_opts);
+    case cudf::io::parquet::DOUBLE:
+      return compare(*(reinterpret_cast<const double*>(v1.data())),
+                     *(reinterpret_cast<const double*>(v2.data())));
 
-  int count{0};
-  while (reader.has_next()) {
-    printf("\n\nhas next %d\n\n", count++);
+    case cudf::io::parquet::BYTE_ARRAY: {
+      int32_t v1sz = v1.size();
+      int32_t v2sz = v2.size();
+      int32_t ret  = memcmp(v1.data(), v2.data(), std::min(v1sz, v2sz));
+      if (ret != 0 or v1sz == v2sz) { return ret; }
+      return v1sz - v2sz;
+    }
 
-    auto result = reader.read_chunk();
-    printf("Result size: %d\n\n\n\n\n", result.tbl->num_rows());
+    default: CUDF_FAIL("Invalid type in compare_binary");
   }
+
+  return 0;
 }
 
-TEST_F(ParquetChunkedReaderTest, TestChunkedReadString)
+TEST_F(ParquetWriterTest, CheckColumnOffsetIndex)
 {
-  // values the cudf parquet writer uses
-  // constexpr size_t default_max_page_size_bytes    = 512 * 1024;   ///< 512KB per page
-  // constexpr size_type default_max_page_size_rows  = 20000;        ///< 20k rows per page
-  std::mt19937 gen(6542);
-  std::bernoulli_distribution bn(0.7f);
-  auto values                        = thrust::make_counting_iterator(0);
-  constexpr cudf::size_type num_rows = 60000;
-  // ints                                            Page    total bytes   cumulative bytes
-  // 20000 rows of 4 bytes each                    = A0      80000         80000
-  // 20000 rows of 4 bytes each                    = A1      80000         160000
-  // 20000 rows of 4 bytes each                    = A2      80000         240000
-  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
-  // strings                                         Page    total bytes   cumulative bytes
-  // 20000 rows of 1 char each    (20000  + 80004) = B0      100004        100004
-  // 20000 rows of 4 chars each   (80000  + 80004) = B1      160004        260008
-  // 20000 rows of 16 chars each  (320000 + 80004) = B2      400004        660012
-  std::vector<std::string> strings{"a", "bbbb", "cccccccccccccccc"};
-  auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int i) {
-    if (i < 20000) { return strings[0]; }
-    if (i < 40000) { return strings[1]; }
-    return strings[2];
-  });
-  cudf::test::strings_column_wrapper b{str_iter, str_iter + num_rows};
-  // cumulative sizes
-  // A0 + B0 :  180004
-  // A1 + B1 :  420008
-  // A2 + B2 :  900012
-  //                                                    skip_rows / num_rows
-  // chunked_read_size of 500000  should give 2 chunks: {0, 40000},           {40000, 20000}
-  // chunked_read_size of 1000000 should give 1 chunks: {0, 60000},
-  auto write_tbl = cudf::table_view{{a, b}};
-  auto filepath  = std::string{"/tmp/chunked_splits_strings.parquet"};
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, write_tbl);
-  cudf::io::write_parquet(out_opts);
-  //========================================================================================
-
-  {
-    cudf::io::parquet_reader_options in_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    auto result = cudf::io::read_parquet(in_opts);
-    printf("Result size read full: %d\n\n\n\n\n", result.tbl->num_rows());
+  constexpr auto num_rows = 100000;
+
+  // fixed length strings
+  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    char buf[30];
+    sprintf(buf, "%012d", i);
+    return std::string(buf);
+  });
+  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
+
+  auto col1_data = random_values<int8_t>(num_rows);
+  auto col2_data = random_values<int16_t>(num_rows);
+  auto col3_data = random_values<int32_t>(num_rows);
+  auto col4_data = random_values<uint64_t>(num_rows);
+  auto col5_data = random_values<float>(num_rows);
+  auto col6_data = random_values<double>(num_rows);
+
+  auto col1 = cudf::test::fixed_width_column_wrapper<int8_t>(col1_data.begin(), col1_data.end());
+  auto col2 = cudf::test::fixed_width_column_wrapper<int16_t>(col2_data.begin(), col2_data.end());
+  auto col3 = cudf::test::fixed_width_column_wrapper<int32_t>(col3_data.begin(), col3_data.end());
+  auto col4 = cudf::test::fixed_width_column_wrapper<uint64_t>(col4_data.begin(), col4_data.end());
+  auto col5 = cudf::test::fixed_width_column_wrapper<float>(col5_data.begin(), col5_data.end());
+  auto col6 = cudf::test::fixed_width_column_wrapper<double>(col6_data.begin(), col6_data.end());
+
+  // mixed length strings
+  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    char buf[30];
+    sprintf(buf, "%d", i);
+    return std::string(buf);
+  });
+  auto col7          = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
+
+  auto const expected = table_view{{col0, col1, col2, col3, col4, col5, col6, col7}};
+
+  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndex.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .max_page_size_rows(20000);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      // loop over offsets, read each page header, make sure it's a data page and that
+      // the first row index is correct
+      auto const oi = read_offset_index(source, chunk);
+
+      int64_t num_vals = 0;
+      for (size_t o = 0; o < oi.page_locations.size(); o++) {
+        auto const& page_loc = oi.page_locations[o];
+        auto const ph        = read_page_header(source, page_loc);
+        EXPECT_EQ(ph.type, cudf::io::parquet::PageType::DATA_PAGE);
+        EXPECT_EQ(page_loc.first_row_index, num_vals);
+        num_vals += ph.data_page_header.num_values;
+      }
+
+      // loop over page stats from the column index. check that stats.min <= page.min
+      // and stats.max >= page.max for each page.
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = parse_statistics(chunk);
+
+      // schema indexing starts at 1
+      auto const ptype = fmd.schema[c + 1].type;
+      auto const ctype = fmd.schema[c + 1].converted_type;
+      for (size_t p = 0; p < ci.min_values.size(); p++) {
+        // null_pages should always be false
+        EXPECT_FALSE(ci.null_pages[p]);
+        // null_counts should always be 0
+        EXPECT_EQ(ci.null_counts[p], 0);
+        EXPECT_TRUE(compare_binary(stats.min_value, ci.min_values[p], ptype, ctype) <= 0);
+      }
+      for (size_t p = 0; p < ci.max_values.size(); p++)
+        EXPECT_TRUE(compare_binary(stats.max_value, ci.max_values[p], ptype, ctype) >= 0);
+    }
+  }
+}
+
+TEST_F(ParquetWriterTest, CheckColumnOffsetIndexNulls)
+{
+  constexpr auto num_rows = 100000;
+
+  // fixed length strings
+  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    char buf[30];
+    sprintf(buf, "%012d", i);
+    return std::string(buf);
+  });
+  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
+
+  auto col1_data = random_values<int8_t>(num_rows);
+  auto col2_data = random_values<int16_t>(num_rows);
+  auto col3_data = random_values<int32_t>(num_rows);
+  auto col4_data = random_values<uint64_t>(num_rows);
+  auto col5_data = random_values<float>(num_rows);
+  auto col6_data = random_values<double>(num_rows);
+
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
+
+  // add null values for all but first column
+  auto col1 =
+    cudf::test::fixed_width_column_wrapper<int8_t>(col1_data.begin(), col1_data.end(), valids);
+  auto col2 =
+    cudf::test::fixed_width_column_wrapper<int16_t>(col2_data.begin(), col2_data.end(), valids);
+  auto col3 =
+    cudf::test::fixed_width_column_wrapper<int32_t>(col3_data.begin(), col3_data.end(), valids);
+  auto col4 =
+    cudf::test::fixed_width_column_wrapper<uint64_t>(col4_data.begin(), col4_data.end(), valids);
+  auto col5 =
+    cudf::test::fixed_width_column_wrapper<float>(col5_data.begin(), col5_data.end(), valids);
+  auto col6 =
+    cudf::test::fixed_width_column_wrapper<double>(col6_data.begin(), col6_data.end(), valids);
+
+  // mixed length strings
+  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    char buf[30];
+    sprintf(buf, "%d", i);
+    return std::string(buf);
+  });
+  auto col7 = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows, valids);
+
+  auto expected = table_view{{col0, col1, col2, col3, col4, col5, col6, col7}};
+
+  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexNulls.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .max_page_size_rows(20000);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      // loop over offsets, read each page header, make sure it's a data page and that
+      // the first row index is correct
+      auto const oi = read_offset_index(source, chunk);
+
+      int64_t num_vals = 0;
+      for (size_t o = 0; o < oi.page_locations.size(); o++) {
+        auto const& page_loc = oi.page_locations[o];
+        auto const ph        = read_page_header(source, page_loc);
+        EXPECT_EQ(ph.type, cudf::io::parquet::PageType::DATA_PAGE);
+        EXPECT_EQ(page_loc.first_row_index, num_vals);
+        num_vals += ph.data_page_header.num_values;
+      }
+
+      // loop over page stats from the column index. check that stats.min <= page.min
+      // and stats.max >= page.max for each page.
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = parse_statistics(chunk);
+
+      // schema indexing starts at 1
+      auto const ptype = fmd.schema[c + 1].type;
+      auto const ctype = fmd.schema[c + 1].converted_type;
+      for (size_t p = 0; p < ci.min_values.size(); p++) {
+        EXPECT_FALSE(ci.null_pages[p]);
+        if (c > 0) {  // first column has no nulls
+          EXPECT_GT(ci.null_counts[p], 0);
+        } else {
+          EXPECT_EQ(ci.null_counts[p], 0);
+        }
+        EXPECT_TRUE(compare_binary(stats.min_value, ci.min_values[p], ptype, ctype) <= 0);
+      }
+      for (size_t p = 0; p < ci.max_values.size(); p++) {
+        EXPECT_TRUE(compare_binary(stats.max_value, ci.max_values[p], ptype, ctype) >= 0);
+      }
+    }
+  }
+}
+
+TEST_F(ParquetWriterTest, CheckColumnOffsetIndexNullColumn)
+{
+  constexpr auto num_rows = 100000;
+
+  // fixed length strings
+  auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    char buf[30];
+    sprintf(buf, "%012d", i);
+    return std::string(buf);
+  });
+  auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
+
+  auto col1_data = random_values<int32_t>(num_rows);
+  auto col2_data = random_values<int32_t>(num_rows);
+
+  // col1 is all nulls
+  auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return false; });
+  auto col1 =
+    cudf::test::fixed_width_column_wrapper<int32_t>(col1_data.begin(), col1_data.end(), valids);
+  auto col2 = cudf::test::fixed_width_column_wrapper<int32_t>(col2_data.begin(), col2_data.end());
+
+  // mixed length strings
+  auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+    char buf[30];
+    sprintf(buf, "%d", i);
+    return std::string(buf);
+  });
+  auto col3          = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
+
+  auto expected = table_view{{col0, col1, col2, col3}};
+
+  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexNullColumn.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .max_page_size_rows(20000);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      // loop over offsets, read each page header, make sure it's a data page and that
+      // the first row index is correct
+      auto const oi = read_offset_index(source, chunk);
+
+      int64_t num_vals = 0;
+      for (size_t o = 0; o < oi.page_locations.size(); o++) {
+        auto const& page_loc = oi.page_locations[o];
+        auto const ph        = read_page_header(source, page_loc);
+        EXPECT_EQ(ph.type, cudf::io::parquet::PageType::DATA_PAGE);
+        EXPECT_EQ(page_loc.first_row_index, num_vals);
+        num_vals += ph.data_page_header.num_values;
+      }
+
+      // loop over page stats from the column index. check that stats.min <= page.min
+      // and stats.max >= page.max for each non-empty page.
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = parse_statistics(chunk);
+
+      // schema indexing starts at 1
+      auto const ptype = fmd.schema[c + 1].type;
+      auto const ctype = fmd.schema[c + 1].converted_type;
+      for (size_t p = 0; p < ci.min_values.size(); p++) {
+        // check tnat null_pages is true for column 1
+        if (c == 1) {
+          EXPECT_TRUE(ci.null_pages[p]);
+          EXPECT_GT(ci.null_counts[p], 0);
+        }
+        if (not ci.null_pages[p]) {
+          EXPECT_EQ(ci.null_counts[p], 0);
+          EXPECT_TRUE(compare_binary(stats.min_value, ci.min_values[p], ptype, ctype) <= 0);
+        }
+      }
+      for (size_t p = 0; p < ci.max_values.size(); p++) {
+        if (not ci.null_pages[p]) {
+          EXPECT_TRUE(compare_binary(stats.max_value, ci.max_values[p], ptype, ctype) >= 0);
+        }
+      }
+    }
+  }
+}
+
+TEST_F(ParquetWriterTest, CheckColumnOffsetIndexStruct)
+{
+  auto c0 = testdata::ascending<uint32_t>();
+
+  auto sc0 = testdata::ascending<cudf::string_view>();
+  auto sc1 = testdata::descending<int32_t>();
+  auto sc2 = testdata::unordered<int64_t>();
+
+  std::vector<std::unique_ptr<cudf::column>> struct_children;
+  struct_children.push_back(sc0.release());
+  struct_children.push_back(sc1.release());
+  struct_children.push_back(sc2.release());
+  cudf::test::structs_column_wrapper c1(std::move(struct_children));
+
+  auto listgen = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return i % 2 == 0 ? i / 2 : num_ordered_rows - (i / 2); });
+  auto list =
+    cudf::test::fixed_width_column_wrapper<int32_t>(listgen, listgen + 2 * num_ordered_rows);
+  auto offgen = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
+  auto offsets =
+    cudf::test::fixed_width_column_wrapper<int32_t>(offgen, offgen + num_ordered_rows + 1);
+
+  auto c2 = cudf::make_lists_column(num_ordered_rows, offsets.release(), list.release(), 0, {});
+
+  table_view expected({c0, c1, *c2});
+
+  auto const filepath = temp_env->get_temp_filepath("CheckColumnOffsetIndexStruct.parquet");
+  const cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .max_page_size_rows(page_size_for_ordered_tests);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  // hard coded schema indices.
+  // TODO find a way to do this without magic
+  size_t colidxs[] = {1, 3, 4, 5, 8};
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      size_t colidx     = colidxs[c];
+      auto const& chunk = rg.columns[c];
+
+      // loop over offsets, read each page header, make sure it's a data page and that
+      // the first row index is correct
+      auto const oi = read_offset_index(source, chunk);
+
+      int64_t num_vals = 0;
+      for (size_t o = 0; o < oi.page_locations.size(); o++) {
+        auto const& page_loc = oi.page_locations[o];
+        auto const ph        = read_page_header(source, page_loc);
+        EXPECT_EQ(ph.type, cudf::io::parquet::PageType::DATA_PAGE);
+        // last column has 2 values per row
+        EXPECT_EQ(page_loc.first_row_index * (c == rg.columns.size() - 1 ? 2 : 1), num_vals);
+        num_vals += ph.data_page_header.num_values;
+      }
+
+      // loop over page stats from the column index. check that stats.min <= page.min
+      // and stats.max >= page.max for each page.
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = parse_statistics(chunk);
+
+      auto const ptype = fmd.schema[colidx].type;
+      auto const ctype = fmd.schema[colidx].converted_type;
+      for (size_t p = 0; p < ci.min_values.size(); p++) {
+        EXPECT_TRUE(compare_binary(stats.min_value, ci.min_values[p], ptype, ctype) <= 0);
+      }
+      for (size_t p = 0; p < ci.max_values.size(); p++) {
+        EXPECT_TRUE(compare_binary(stats.max_value, ci.max_values[p], ptype, ctype) >= 0);
+      }
+    }
+  }
+}
+
+TEST_F(ParquetWriterTest, CheckColumnIndexTruncation)
+{
+  const char* coldata[] = {
+    // in-range 7 bit.  should truncate to "yyyyyyyz"
+    "yyyyyyyyy",
+    // max 7 bit. should truncate to "x7fx7fx7fx7fx7fx7fx7fx80", since it's
+    // considered binary, not UTF-8.  If UTF-8 it should not truncate.
+    "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f",
+    // max binary.  this should not truncate
+    "\xff\xff\xff\xff\xff\xff\xff\xff\xff",
+    // in-range 2-byte UTF8 (U+00E9). should truncate to "éééê"
+    "ééééé",
+    // max 2-byte UTF8 (U+07FF). should not truncate
+    "߿߿߿߿߿",
+    // in-range 3-byte UTF8 (U+0800). should truncate to "ࠀࠁ"
+    "ࠀࠀࠀ",
+    // max 3-byte UTF8 (U+FFFF). should not truncate
+    "\xef\xbf\xbf\xef\xbf\xbf\xef\xbf\xbf",
+    // in-range 4-byte UTF8 (U+10000). should truncate to "𐀀𐀁"
+    "𐀀𐀀𐀀",
+    // max unicode (U+10FFFF). should truncate to \xf4\x8f\xbf\xbf\xf4\x90\x80\x80,
+    // which is no longer valid unicode, but is still ok UTF-8???
+    "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf",
+    // max 4-byte UTF8 (U+1FFFFF). should not truncate
+    "\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf"};
+
+  // NOTE: UTF8 min is initialized with 0xf7bfbfbf. Binary values larger
+  // than that will not become minimum value (when written as UTF-8).
+  const char* truncated_min[] = {"yyyyyyyy",
+                                 "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f",
+                                 "\xf7\xbf\xbf\xbf",
+                                 "éééé",
+                                 "߿߿߿߿",
+                                 "ࠀࠀ",
+                                 "\xef\xbf\xbf\xef\xbf\xbf",
+                                 "𐀀𐀀",
+                                 "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf",
+                                 "\xf7\xbf\xbf\xbf"};
+
+  const char* truncated_max[] = {"yyyyyyyz",
+                                 "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x80",
+                                 "\xff\xff\xff\xff\xff\xff\xff\xff\xff",
+                                 "éééê",
+                                 "߿߿߿߿߿",
+                                 "ࠀࠁ",
+                                 "\xef\xbf\xbf\xef\xbf\xbf\xef\xbf\xbf",
+                                 "𐀀𐀁",
+                                 "\xf4\x8f\xbf\xbf\xf4\x90\x80\x80",
+                                 "\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf"};
+
+  auto cols = [&]() {
+    using string_wrapper = column_wrapper<cudf::string_view>;
+    std::vector<std::unique_ptr<column>> cols;
+    for (auto const str : coldata) {
+      cols.push_back(string_wrapper{str}.release());
+    }
+    return cols;
+  }();
+  auto expected = std::make_unique<table>(std::move(cols));
+
+  auto const filepath = temp_env->get_temp_filepath("CheckColumnIndexTruncation.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected->view())
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .column_index_truncate_length(8);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = parse_statistics(chunk);
+
+      // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max
+      auto const ptype = fmd.schema[c + 1].type;
+      auto const ctype = fmd.schema[c + 1].converted_type;
+      EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value, ptype, ctype) <= 0);
+      EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value, ptype, ctype) >= 0);
+
+      // check that truncated values == expected
+      EXPECT_EQ(memcmp(ci.min_values[0].data(), truncated_min[c], ci.min_values[0].size()), 0);
+      EXPECT_EQ(memcmp(ci.max_values[0].data(), truncated_max[c], ci.max_values[0].size()), 0);
+    }
   }
+}
+
+TEST_F(ParquetWriterTest, BinaryColumnIndexTruncation)
+{
+  std::vector<uint8_t> truncated_min[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe},
+                                          {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+
+  std::vector<uint8_t> truncated_max[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff},
+                                          {0xff},
+                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+
+  cudf::test::lists_column_wrapper<uint8_t> col0{
+    {0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe}};
+  cudf::test::lists_column_wrapper<uint8_t> col1{
+    {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+  cudf::test::lists_column_wrapper<uint8_t> col2{
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
 
-  cudf::io::chunked_parquet_reader_options in_opts =
-    cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath});
-  in_opts.set_byte_limit(500000);
+  auto expected = table_view{{col0, col1, col2}};
 
-  cudf::io::chunked_parquet_reader reader(in_opts);
+  cudf::io::table_input_metadata output_metadata(expected);
+  output_metadata.column_metadata[0].set_name("col_binary0").set_output_as_binary(true);
+  output_metadata.column_metadata[1].set_name("col_binary1").set_output_as_binary(true);
+  output_metadata.column_metadata[2].set_name("col_binary2").set_output_as_binary(true);
 
-  int count{0};
-  while (reader.has_next()) {
-    printf("\n\nhas next %d\n\n", count++);
+  auto const filepath = temp_env->get_temp_filepath("BinaryColumnIndexTruncation.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(&output_metadata)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .column_index_truncate_length(8);
+  cudf::io::write_parquet(out_opts);
+
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
 
-    auto result = reader.read_chunk();
-    printf("Result size: %d\n\n\n\n\n", result.tbl->num_rows());
+  for (size_t r = 0; r < fmd.row_groups.size(); r++) {
+    auto const& rg = fmd.row_groups[r];
+    for (size_t c = 0; c < rg.columns.size(); c++) {
+      auto const& chunk = rg.columns[c];
+
+      auto const ci    = read_column_index(source, chunk);
+      auto const stats = parse_statistics(chunk);
+
+      // check trunc(page.min) <= stats.min && trun(page.max) >= stats.max
+      auto const ptype = fmd.schema[c + 1].type;
+      auto const ctype = fmd.schema[c + 1].converted_type;
+      EXPECT_TRUE(compare_binary(ci.min_values[0], stats.min_value, ptype, ctype) <= 0);
+      EXPECT_TRUE(compare_binary(ci.max_values[0], stats.max_value, ptype, ctype) >= 0);
+
+      // check that truncated values == expected
+      EXPECT_EQ(ci.min_values[0], truncated_min[c]);
+      EXPECT_EQ(ci.max_values[0], truncated_max[c]);
+    }
   }
 }
-#endif
+
+TEST_F(ParquetReaderTest, EmptyColumnsParam)
+{
+  srand(31337);
+  auto const expected = create_random_fixed_table<int>(2, 4, false);
+
+  std::vector<char> out_buffer;
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&out_buffer}, *expected);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(
+      cudf::io::source_info{out_buffer.data(), out_buffer.size()})
+      .columns({});
+  auto const result = cudf::io::read_parquet(read_opts);
+
+  EXPECT_EQ(result.tbl->num_columns(), 0);
+  EXPECT_EQ(result.tbl->num_rows(), 0);
+}
+
+TEST_F(ParquetReaderTest, BinaryAsStrings)
+{
+  std::vector<const char*> strings{
+    "Monday", "Wednesday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"};
+  const auto num_rows = strings.size();
+
+  auto seq_col0 = random_values<int>(num_rows);
+  auto seq_col2 = random_values<float>(num_rows);
+  auto seq_col3 = random_values<uint8_t>(num_rows);
+  auto validity = cudf::test::iterators::no_nulls();
+
+  column_wrapper<int> int_col{seq_col0.begin(), seq_col0.end(), validity};
+  column_wrapper<cudf::string_view> string_col{strings.begin(), strings.end()};
+  column_wrapper<float> float_col{seq_col2.begin(), seq_col2.end(), validity};
+  cudf::test::lists_column_wrapper<uint8_t> list_int_col{
+    {'M', 'o', 'n', 'd', 'a', 'y'},
+    {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'M', 'o', 'n', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'F', 'r', 'i', 'd', 'a', 'y'},
+    {'F', 'u', 'n', 'd', 'a', 'y'}};
+
+  auto output = table_view{{int_col, string_col, float_col, string_col, list_int_col}};
+  cudf::io::table_input_metadata output_metadata(output);
+  output_metadata.column_metadata[0].set_name("col_other");
+  output_metadata.column_metadata[1].set_name("col_string");
+  output_metadata.column_metadata[2].set_name("col_float");
+  output_metadata.column_metadata[3].set_name("col_string2").set_output_as_binary(true);
+  output_metadata.column_metadata[4].set_name("col_binary").set_output_as_binary(true);
+
+  auto filepath = temp_env->get_temp_filepath("BinaryReadStrings.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, output)
+      .metadata(&output_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  auto expected_string = table_view{{int_col, string_col, float_col, string_col, string_col}};
+  auto expected_mixed  = table_view{{int_col, string_col, float_col, list_int_col, list_int_col}};
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema({{}, {}, {}, {}, {}});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view());
+
+  cudf::io::parquet_reader_options default_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  result = cudf::io::read_parquet(default_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view());
+
+  std::vector<cudf::io::reader_column_schema> md{
+    {},
+    {},
+    {},
+    cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
+    cudf::io::reader_column_schema().set_convert_binary_to_strings(false)};
+
+  cudf::io::parquet_reader_options mixed_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema(md);
+  result = cudf::io::read_parquet(mixed_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected_mixed, result.tbl->view());
+}
+
+TEST_F(ParquetReaderTest, NestedByteArray)
+{
+  constexpr auto num_rows = 8;
+
+  auto seq_col0       = random_values<int>(num_rows);
+  auto seq_col2       = random_values<float>(num_rows);
+  auto seq_col3       = random_values<uint8_t>(num_rows);
+  auto const validity = cudf::test::iterators::no_nulls();
+
+  column_wrapper<int> int_col{seq_col0.begin(), seq_col0.end(), validity};
+  column_wrapper<float> float_col{seq_col2.begin(), seq_col2.end(), validity};
+  cudf::test::lists_column_wrapper<uint8_t> list_list_int_col{
+    {{'M', 'o', 'n', 'd', 'a', 'y'},
+     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+     {'F', 'r', 'i', 'd', 'a', 'y'}},
+    {{'M', 'o', 'n', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}},
+    {{'M', 'o', 'n', 'd', 'a', 'y'},
+     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+     {'F', 'r', 'i', 'd', 'a', 'y'}},
+    {{'F', 'r', 'i', 'd', 'a', 'y'},
+     {'F', 'r', 'i', 'd', 'a', 'y'},
+     {'F', 'u', 'n', 'd', 'a', 'y'}},
+    {{'M', 'o', 'n', 'd', 'a', 'y'},
+     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+     {'F', 'r', 'i', 'd', 'a', 'y'}},
+    {{'F', 'r', 'i', 'd', 'a', 'y'},
+     {'F', 'r', 'i', 'd', 'a', 'y'},
+     {'F', 'u', 'n', 'd', 'a', 'y'}},
+    {{'M', 'o', 'n', 'd', 'a', 'y'},
+     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+     {'F', 'r', 'i', 'd', 'a', 'y'}},
+    {{'M', 'o', 'n', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}}};
+
+  auto const expected = table_view{{int_col, float_col, list_list_int_col}};
+  cudf::io::table_input_metadata output_metadata(expected);
+  output_metadata.column_metadata[0].set_name("col_other");
+  output_metadata.column_metadata[1].set_name("col_float");
+  output_metadata.column_metadata[2].set_name("col_binary").child(1).set_output_as_binary(true);
+
+  auto filepath = temp_env->get_temp_filepath("NestedByteArray.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(&output_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  auto source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  EXPECT_EQ(fmd.schema[5].type, cudf::io::parquet::Type::BYTE_ARRAY);
+
+  std::vector<cudf::io::reader_column_schema> md{
+    {},
+    {},
+    cudf::io::reader_column_schema().add_child(
+      cudf::io::reader_column_schema().set_convert_binary_to_strings(false))};
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema(md);
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TEST_F(ParquetWriterTest, ByteArrayStats)
+{
+  // check that byte array min and max statistics are written as expected. If a byte array is
+  // written as a string, max utf8 is 0xf7bfbfbf and so the minimum value will be set to that value
+  // instead of a potential minimum higher than that.
+  std::vector<uint8_t> expected_col0_min{0xf0};
+  std::vector<uint8_t> expected_col0_max{0xf0, 0xf5, 0xf5};
+  std::vector<uint8_t> expected_col1_min{0xfe, 0xfe, 0xfe};
+  std::vector<uint8_t> expected_col1_max{0xfe, 0xfe, 0xfe};
+
+  cudf::test::lists_column_wrapper<uint8_t> list_int_col0{
+    {0xf0}, {0xf0, 0xf5, 0xf3}, {0xf0, 0xf5, 0xf5}};
+  cudf::test::lists_column_wrapper<uint8_t> list_int_col1{
+    {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}};
+
+  auto expected = table_view{{list_int_col0, list_int_col1}};
+  cudf::io::table_input_metadata output_metadata(expected);
+  output_metadata.column_metadata[0].set_name("col_binary0").set_output_as_binary(true);
+  output_metadata.column_metadata[1].set_name("col_binary1").set_output_as_binary(true);
+
+  auto filepath = temp_env->get_temp_filepath("ByteArrayStats.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(&output_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema({{}, {}});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  auto source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+
+  EXPECT_EQ(fmd.schema[1].type, cudf::io::parquet::Type::BYTE_ARRAY);
+  EXPECT_EQ(fmd.schema[2].type, cudf::io::parquet::Type::BYTE_ARRAY);
+
+  auto const stats0 = parse_statistics(fmd.row_groups[0].columns[0]);
+  auto const stats1 = parse_statistics(fmd.row_groups[0].columns[1]);
+
+  EXPECT_EQ(expected_col0_min, stats0.min_value);
+  EXPECT_EQ(expected_col0_max, stats0.max_value);
+  EXPECT_EQ(expected_col1_min, stats1.min_value);
+  EXPECT_EQ(expected_col1_max, stats1.max_value);
+}
+
+TEST_F(ParquetReaderTest, StructByteArray)
+{
+  constexpr auto num_rows = 100;
+
+  auto seq_col0       = random_values<uint8_t>(num_rows);
+  auto const validity = cudf::test::iterators::no_nulls();
+
+  column_wrapper<uint8_t> int_col{seq_col0.begin(), seq_col0.end(), validity};
+  cudf::test::lists_column_wrapper<uint8_t> list_of_int{{seq_col0.begin(), seq_col0.begin() + 50},
+                                                        {seq_col0.begin() + 50, seq_col0.end()}};
+  auto struct_col = cudf::test::structs_column_wrapper{{list_of_int}, validity};
+
+  auto const expected = table_view{{struct_col}};
+  EXPECT_EQ(1, expected.num_columns());
+  cudf::io::table_input_metadata output_metadata(expected);
+  output_metadata.column_metadata[0]
+    .set_name("struct_binary")
+    .child(0)
+    .set_name("a")
+    .set_output_as_binary(true);
+
+  auto filepath = temp_env->get_temp_filepath("StructByteArray.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(&output_metadata);
+  cudf::io::write_parquet(out_opts);
+
+  std::vector<cudf::io::reader_column_schema> md{cudf::io::reader_column_schema().add_child(
+    cudf::io::reader_column_schema().set_convert_binary_to_strings(false))};
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema(md);
+  auto result = cudf::io::read_parquet(in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+}
+
+TEST_F(ParquetWriterTest, SingleValueDictionaryTest)
+{
+  constexpr unsigned int expected_bits = 1;
+  constexpr unsigned int nrows         = 1'000'000U;
+
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return "a unique string value suffixed with 1"; });
+  auto const col0     = cudf::test::strings_column_wrapper(elements, elements + nrows);
+  auto const expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("SingleValueDictionaryTest.parquet");
+  // set row group size so that there will be only one row group
+  // no compression so we can easily read page data
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .compression(cudf::io::compression_type::NONE)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .row_group_size_rows(nrows);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options default_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(default_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+
+  // make sure dictionary was used
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  auto used_dict = [&fmd]() {
+    for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) {
+      if (enc == cudf::io::parquet::Encoding::PLAIN_DICTIONARY or
+          enc == cudf::io::parquet::Encoding::RLE_DICTIONARY) {
+        return true;
+      }
+    }
+    return false;
+  };
+  EXPECT_TRUE(used_dict());
+
+  // and check that the correct number of bits was used
+  auto const oi    = read_offset_index(source, fmd.row_groups[0].columns[0]);
+  auto const nbits = read_dict_bits(source, oi.page_locations[0]);
+  EXPECT_EQ(nbits, expected_bits);
+}
+
+TEST_P(ParquetSizedTest, DictionaryTest)
+{
+  const unsigned int cardinality = (1 << (GetParam() - 1)) + 1;
+  const unsigned int nrows       = std::max(cardinality * 3 / 2, 3'000'000U);
+
+  auto elements       = cudf::detail::make_counting_transform_iterator(0, [cardinality](auto i) {
+    return "a unique string value suffixed with " + std::to_string(i % cardinality);
+  });
+  auto const col0     = cudf::test::strings_column_wrapper(elements, elements + nrows);
+  auto const expected = table_view{{col0}};
+
+  auto const filepath = temp_env->get_temp_filepath("DictionaryTest.parquet");
+  // set row group size so that there will be only one row group
+  // no compression so we can easily read page data
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .compression(cudf::io::compression_type::NONE)
+      .stats_level(cudf::io::statistics_freq::STATISTICS_COLUMN)
+      .row_group_size_rows(nrows)
+      .row_group_size_bytes(512 * 1024 * 1024);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options default_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(default_in_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
+
+  // make sure dictionary was used
+  auto const source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
+
+  read_footer(source, &fmd);
+  auto used_dict = [&fmd]() {
+    for (auto enc : fmd.row_groups[0].columns[0].meta_data.encodings) {
+      if (enc == cudf::io::parquet::Encoding::PLAIN_DICTIONARY or
+          enc == cudf::io::parquet::Encoding::RLE_DICTIONARY) {
+        return true;
+      }
+    }
+    return false;
+  };
+  EXPECT_TRUE(used_dict());
+
+  // and check that the correct number of bits was used
+  auto const oi    = read_offset_index(source, fmd.row_groups[0].columns[0]);
+  auto const nbits = read_dict_bits(source, oi.page_locations[0]);
+  EXPECT_EQ(nbits, GetParam());
+}
+
+CUDF_TEST_PROGRAM_MAIN()

From 81097ebebdd1bbf30c8048a4707c437cc19ace73 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 10:20:29 -0700
Subject: [PATCH 058/162] Modify `read` to add exception and preprocess once

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 68 ++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index ea1e53bb879..6766e918854 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1561,6 +1561,8 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
   size_type num_rows,
   const std::vector<std::vector<size_type>>& row_group_list)
 {
+  printf("\n\n\n\npreprocess========================\n");
+
   // Select only row groups required
   // Note: `skip_rows` and `num_rows` will be modified in this function.
   const auto selected_row_groups =
@@ -1708,17 +1710,16 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
 
 table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bounds)
 {
-  if (!has_next()) {
-    // return empty
-  }
-
-  auto const& read_info = _chunk_read_info[_current_read_chunk++];
   table_metadata out_metadata;
 
   // output cudf columns as determined by the top level schema
   std::vector<std::unique_ptr<column>> out_columns;
   out_columns.reserve(_output_columns.size());
 
+  if (!has_next()) { return finalize_output(out_metadata, out_columns); }
+
+  auto const& read_info = _chunk_read_info[_current_read_chunk++];
+
   // allocate outgoing columns
   allocate_columns(_file_itm_data.chunks,
                    _file_itm_data.pages_info,
@@ -1777,30 +1778,39 @@ table_with_metadata reader::impl::read(size_type skip_rows,
                                        bool uses_custom_row_bounds,
                                        std::vector<std::vector<size_type>> const& row_group_list)
 {
-  auto [skip_rows_corrected, num_rows_corrected] =
-    preprocess_file(skip_rows, num_rows, row_group_list);
-
-  // todo: fix this (empty output may be incorrect)
-  if (!_file_itm_data.has_data) { return table_with_metadata{}; }
-
-  // - compute column sizes and allocate output buffers.
-  //   important:
-  //   for nested schemas, we have to do some further preprocessing to determine:
-  //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
-  //    of
-  //      nesting and it's size is the row count)
-  //
-  // - for nested schemas, output buffer offset values per-page, per nesting-level for the
-  // purposes of decoding.
-  // TODO: make this a parameter.
-
-  //      auto const _chunk_read_limit = 0;
-  preprocess_columns(_file_itm_data.chunks,
-                     _file_itm_data.pages_info,
-                     skip_rows_corrected,
-                     num_rows_corrected,
-                     uses_custom_row_bounds,
-                     _chunk_read_limit);
+  CUDF_EXPECTS(_chunk_read_limit == 0, "Reading the whole file must not have non-zero byte_limit.");
+
+  if (!_file_preprocessed) {
+    auto [skip_rows_corrected, num_rows_corrected] =
+      preprocess_file(skip_rows, num_rows, row_group_list);
+
+    // todo: fix this (empty output may be incorrect)
+    if (!_file_itm_data.has_data) { return table_with_metadata{}; }
+
+    // - compute column sizes and allocate output buffers.
+    //   important:
+    //   for nested schemas, we have to do some further preprocessing to determine:
+    //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
+    //    of
+    //      nesting and it's size is the row count)
+    //
+    // - for nested schemas, output buffer offset values per-page, per nesting-level for the
+    // purposes of decoding.
+    // TODO: make this a parameter.
+
+    //      auto const _chunk_read_limit = 0;
+    preprocess_columns(_file_itm_data.chunks,
+                       _file_itm_data.pages_info,
+                       skip_rows_corrected,
+                       num_rows_corrected,
+                       uses_custom_row_bounds,
+                       _chunk_read_limit);
+
+    CUDF_EXPECTS(_chunk_read_info.size() == 1,
+                 "Reading the whole file should yield only one chunk.");
+
+    _file_preprocessed = true;
+  }
 
   return read_chunk_internal(uses_custom_row_bounds);
 }

From fcffac8497de13f4567215c33b660fe0f4ec5046 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 10:53:38 -0700
Subject: [PATCH 059/162] Rewrite tests

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 191 +++++++++----------
 1 file changed, 91 insertions(+), 100 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 7052bc1cbc9..aaf26bf44e6 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,136 +38,127 @@
 #include <src/io/parquet/compact_protocol_reader.hpp>
 #include <src/io/parquet/parquet.hpp>
 
-#include <rmm/cuda_stream_view.hpp>
-
 #include <thrust/iterator/counting_iterator.h>
 
+#include <rmm/cuda_stream_view.hpp>
+
 #include <fstream>
 #include <type_traits>
 
+// Global environment for temporary files
+auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
+  ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
+
+using int32s_col  = cudf::test::fixed_width_column_wrapper<int32_t>;
+using int64s_col  = cudf::test::fixed_width_column_wrapper<int64_t>;
+using strings_col = cudf::test::strings_column_wrapper;
+
 struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
 };
 
-#if 0
-TEST_F(ParquetChunkedReaderTest, Test)
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
 {
-  std::mt19937 gen(6542);
-  std::bernoulli_distribution bn(0.7f);
-  auto values = thrust::make_counting_iterator(0);
-
-  constexpr cudf::size_type num_rows = 40000;
-  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
-  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
-
-  cudf::table_view t({a, b});
-  cudf::io::parquet_writer_options opts = cudf::io::parquet_writer_options::builder(
-    cudf::io::sink_info{"/tmp/chunked_splits.parquet"}, t);
-  cudf::io::write_parquet(opts);
-
-  cudf::io::parquet_reader_options in_opts =
-    cudf::io::parquet_reader_options::builder(cudf::io::source_info{"/tmp/chunked_splits.parquet"});
-  auto result = cudf::io::read_parquet(in_opts);
-  printf("\nResult size read all: %d\n\n", result.tbl->num_rows());
-}
+  auto constexpr num_rows = 40000;
+  auto const filepath     = temp_env->get_temp_filepath("chunked_read_simple.parquet");
 
-#else
-TEST_F(ParquetChunkedReaderTest, TestChunkedRead)
-{
-  std::mt19937 gen(6542);
-  std::bernoulli_distribution bn(0.7f);
-  auto values = thrust::make_counting_iterator(0);
-
-  constexpr cudf::size_type num_rows = 40000;
-  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
-  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
-
-  auto filepath = std::string{"/tmp/chunked_splits.parquet"};
-  cudf::table_view t({a, b});
-  cudf::io::parquet_writer_options opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, t);
-  cudf::io::write_parquet(opts);
-
-  //========================================================================================
-  {
-    cudf::io::parquet_reader_options in_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    auto result = cudf::io::read_parquet(in_opts);
-    printf("Result size read full: %d\n\n\n\n\n", result.tbl->num_rows());
-  }
+  auto const values = thrust::make_counting_iterator(0);
+  auto const a      = int32s_col(values, values + num_rows);
+  auto const b      = int64s_col(values, values + num_rows);
+  auto const input  = cudf::table_view{{a, b}};
 
-  cudf::io::chunked_parquet_reader_options in_opts =
-    cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath});
-  in_opts.set_byte_limit(240000);
+  auto const write_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input).build();
+  cudf::io::write_parquet(write_opts);
 
-  cudf::io::chunked_parquet_reader reader(in_opts);
+  auto const read_opts =
+    cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .byte_limit(240000)
+      .build();
+  auto reader = cudf::io::chunked_parquet_reader(read_opts);
 
-  int count{0};
-  while (reader.has_next()) {
-    printf("\n\nhas next %d\n\n", count++);
+  auto num_chunks = 0;
+  auto result     = std::make_unique<cudf::table>();
 
-    auto result = reader.read_chunk();
-    printf("Result size: %d\n\n\n\n\n", result.tbl->num_rows());
+  while (reader.has_next()) {
+    auto chunk = reader.read_chunk();
+    if (num_chunks == 0) {
+      result = std::move(chunk.tbl);
+    } else {
+      result = cudf::concatenate(std::vector<cudf::table_view>{result->view(), chunk.tbl->view()});
+    }
+    ++num_chunks;
   }
+
+  EXPECT_EQ(num_chunks, 2);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
 }
 
-TEST_F(ParquetChunkedReaderTest, TestChunkedReadString)
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
 {
-  // values the cudf parquet writer uses
-  // constexpr size_t default_max_page_size_bytes    = 512 * 1024;   ///< 512KB per page
-  // constexpr size_type default_max_page_size_rows  = 20000;        ///< 20k rows per page
-  std::mt19937 gen(6542);
-  std::bernoulli_distribution bn(0.7f);
-  auto values                        = thrust::make_counting_iterator(0);
-  constexpr cudf::size_type num_rows = 60000;
+  auto constexpr num_rows = 60000;
+  auto const filepath     = temp_env->get_temp_filepath("chunked_read_with_strings.parquet");
+
+  auto const values = thrust::make_counting_iterator(0);
+
   // ints                                            Page    total bytes   cumulative bytes
   // 20000 rows of 4 bytes each                    = A0      80000         80000
   // 20000 rows of 4 bytes each                    = A1      80000         160000
   // 20000 rows of 4 bytes each                    = A2      80000         240000
-  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
+  auto const a = int32s_col(values, values + num_rows);
+
   // strings                                         Page    total bytes   cumulative bytes
   // 20000 rows of 1 char each    (20000  + 80004) = B0      100004        100004
   // 20000 rows of 4 chars each   (80000  + 80004) = B1      160004        260008
   // 20000 rows of 16 chars each  (320000 + 80004) = B2      400004        660012
-  std::vector<std::string> strings{"a", "bbbb", "cccccccccccccccc"};
-  auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int i) {
+  auto const strings  = std::vector<std::string>{"a", "bbbb", "cccccccccccccccc"};
+  auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) {
     if (i < 20000) { return strings[0]; }
     if (i < 40000) { return strings[1]; }
     return strings[2];
   });
-  cudf::test::strings_column_wrapper b{str_iter, str_iter + num_rows};
-  // cumulative sizes
+  auto const b        = strings_col{str_iter, str_iter + num_rows};
+  auto const input    = cudf::table_view{{a, b}};
+
+  auto const write_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input)
+      .max_page_size_bytes(512 * 1024)  // 512KB per page
+      .max_page_size_rows(20000)        // 20k rows per page
+      .build();
+  cudf::io::write_parquet(write_opts);
+
+  // Cumulative sizes:
   // A0 + B0 :  180004
   // A1 + B1 :  420008
   // A2 + B2 :  900012
-  //                                                    skip_rows / num_rows
-  // chunked_read_size of 500000  should give 2 chunks: {0, 40000},           {40000, 20000}
-  // chunked_read_size of 1000000 should give 1 chunks: {0, 60000},
-  auto write_tbl = cudf::table_view{{a, b}};
-  auto filepath  = std::string{"/tmp/chunked_splits_strings.parquet"};
-  cudf::io::parquet_writer_options out_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, write_tbl);
-  cudf::io::write_parquet(out_opts);
-  //========================================================================================
-
-  {
-    cudf::io::parquet_reader_options in_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    auto result = cudf::io::read_parquet(in_opts);
-    printf("Result size read full: %d\n\n\n\n\n", result.tbl->num_rows());
-  }
-
-  cudf::io::chunked_parquet_reader_options in_opts =
-    cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath});
-  in_opts.set_byte_limit(500000);
-
-  cudf::io::chunked_parquet_reader reader(in_opts);
-
-  int count{0};
-  while (reader.has_next()) {
-    printf("\n\nhas next %d\n\n", count++);
-
-    auto result = reader.read_chunk();
-    printf("Result size: %d\n\n\n\n\n", result.tbl->num_rows());
-  }
+  //                                             skip_rows / num_rows
+  // byte_limit==500000  should give 2 chunks: {0, 40000}, {40000, 20000}
+  // byte_limit==1000000 should give 1 chunks: {0, 60000},
+
+  auto const do_test = [&](auto const byte_limit, auto const expected_num_chunks) {
+    auto const read_opts =
+      cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .byte_limit(byte_limit)
+        .build();
+    auto reader = cudf::io::chunked_parquet_reader(read_opts);
+
+    auto num_chunks = 0;
+    auto result     = std::make_unique<cudf::table>();
+
+    while (reader.has_next()) {
+      auto chunk = reader.read_chunk();
+      if (num_chunks == 0) {
+        result = std::move(chunk.tbl);
+      } else {
+        result =
+          cudf::concatenate(std::vector<cudf::table_view>{result->view(), chunk.tbl->view()});
+      }
+      ++num_chunks;
+    }
+
+    EXPECT_EQ(num_chunks, expected_num_chunks);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
+  };
+
+  do_test(500000, 2);
+  do_test(1000000, 1);
 }
-#endif

From 43dd802cb42ac20a66cbadfd0d52afcd79b844c9 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 11:03:10 -0700
Subject: [PATCH 060/162] Store `decomp_page_data`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/parquet_gpu.hpp | 1 +
 cpp/src/io/parquet/reader_impl.cu  | 5 ++---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index ac2c0d4b606..ea7bbe72feb 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -249,6 +249,7 @@ struct file_intermediate_data {
   hostdevice_vector<gpu::ColumnChunkDesc> chunks{};
   hostdevice_vector<gpu::PageInfo> pages_info{};
   hostdevice_vector<gpu::PageNestingInfo> page_nesting_info{};
+  rmm::device_buffer decomp_page_data;
   bool has_data{false};
 };
 
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 6766e918854..0428b0a70a2 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1669,12 +1669,11 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
   _file_itm_data.pages_info = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
 
   if (total_pages > 0) {
-    rmm::device_buffer decomp_page_data;
-
     // decoding of column/page information
     decode_page_headers(_file_itm_data.chunks, _file_itm_data.pages_info);
     if (total_decompressed_size > 0) {
-      decomp_page_data = decompress_page_data(_file_itm_data.chunks, _file_itm_data.pages_info);
+      _file_itm_data.decomp_page_data =
+        decompress_page_data(_file_itm_data.chunks, _file_itm_data.pages_info);
       // Free compressed data
       for (size_t c = 0; c < _file_itm_data.chunks.size(); c++) {
         if (_file_itm_data.chunks[c].codec != parquet::Compression::UNCOMPRESSED) {

From eeec023e892be7077057fab672b702a49f517b54 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 11:13:41 -0700
Subject: [PATCH 061/162] Rewrite tests

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 85 +++++++++-----------
 1 file changed, 38 insertions(+), 47 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index aaf26bf44e6..3336098f9e8 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -45,6 +45,7 @@
 #include <fstream>
 #include <type_traits>
 
+namespace {
 // Global environment for temporary files
 auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
   ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
@@ -53,26 +54,11 @@ using int32s_col  = cudf::test::fixed_width_column_wrapper<int32_t>;
 using int64s_col  = cudf::test::fixed_width_column_wrapper<int64_t>;
 using strings_col = cudf::test::strings_column_wrapper;
 
-struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
-};
-
-TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
+auto run_test(std::string const& filepath, std::size_t byte_limit)
 {
-  auto constexpr num_rows = 40000;
-  auto const filepath     = temp_env->get_temp_filepath("chunked_read_simple.parquet");
-
-  auto const values = thrust::make_counting_iterator(0);
-  auto const a      = int32s_col(values, values + num_rows);
-  auto const b      = int64s_col(values, values + num_rows);
-  auto const input  = cudf::table_view{{a, b}};
-
-  auto const write_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input).build();
-  cudf::io::write_parquet(write_opts);
-
   auto const read_opts =
     cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .byte_limit(240000)
+      .byte_limit(byte_limit)
       .build();
   auto reader = cudf::io::chunked_parquet_reader(read_opts);
 
@@ -89,16 +75,39 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
     ++num_chunks;
   }
 
+  return std::pair(std::move(result), num_chunks);
+}
+
+}  // namespace
+
+struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
+};
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
+{
+  auto constexpr num_rows = 40'000;
+  auto const filepath     = temp_env->get_temp_filepath("chunked_read_simple.parquet");
+
+  auto const values = thrust::make_counting_iterator(0);
+  auto const a      = int32s_col(values, values + num_rows);
+  auto const b      = int64s_col(values, values + num_rows);
+  auto const input  = cudf::table_view{{a, b}};
+
+  auto const write_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input).build();
+  cudf::io::write_parquet(write_opts);
+
+  auto constexpr byte_limit       = 240'000;
+  auto const [result, num_chunks] = run_test(filepath, byte_limit);
   EXPECT_EQ(num_chunks, 2);
   CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
 }
 
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
 {
-  auto constexpr num_rows = 60000;
+  auto constexpr num_rows = 60'000;
   auto const filepath     = temp_env->get_temp_filepath("chunked_read_with_strings.parquet");
-
-  auto const values = thrust::make_counting_iterator(0);
+  auto const values       = thrust::make_counting_iterator(0);
 
   // ints                                            Page    total bytes   cumulative bytes
   // 20000 rows of 4 bytes each                    = A0      80000         80000
@@ -133,32 +142,14 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
   //                                             skip_rows / num_rows
   // byte_limit==500000  should give 2 chunks: {0, 40000}, {40000, 20000}
   // byte_limit==1000000 should give 1 chunks: {0, 60000},
-
-  auto const do_test = [&](auto const byte_limit, auto const expected_num_chunks) {
-    auto const read_opts =
-      cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath})
-        .byte_limit(byte_limit)
-        .build();
-    auto reader = cudf::io::chunked_parquet_reader(read_opts);
-
-    auto num_chunks = 0;
-    auto result     = std::make_unique<cudf::table>();
-
-    while (reader.has_next()) {
-      auto chunk = reader.read_chunk();
-      if (num_chunks == 0) {
-        result = std::move(chunk.tbl);
-      } else {
-        result =
-          cudf::concatenate(std::vector<cudf::table_view>{result->view(), chunk.tbl->view()});
-      }
-      ++num_chunks;
-    }
-
-    EXPECT_EQ(num_chunks, expected_num_chunks);
+  {
+    auto const [result, num_chunks] = run_test(filepath, 500'000);
+    EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
-  };
-
-  do_test(500000, 2);
-  do_test(1000000, 1);
+  }
+  {
+    auto const [result, num_chunks] = run_test(filepath, 1'000'000);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
+  }
 }

From 14dfd3f573b12d373bd9adbc6b05c9aa0b9c0528 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 11:30:01 -0700
Subject: [PATCH 062/162] Simple test

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 30 ++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 3336098f9e8..5cc0fa4d819 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -83,6 +83,31 @@ auto run_test(std::string const& filepath, std::size_t byte_limit)
 struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
 };
 
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
+{
+  auto constexpr num_rows = 400;
+  auto const filepath     = temp_env->get_temp_filepath("chunked_read_simple.parquet");
+
+  auto const values = thrust::make_counting_iterator(0);
+  auto const a      = int32s_col(values, values + num_rows);
+  auto const b      = int64s_col(values, values + num_rows);
+  auto const input  = cudf::table_view{{a, b}};
+
+  auto const write_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input).build();
+  cudf::io::write_parquet(write_opts);
+
+  auto constexpr byte_limit       = 0;
+  auto const [result, num_chunks] = run_test(filepath, byte_limit);
+  EXPECT_EQ(num_chunks, 1);
+
+  cudf::test::print(a);
+  cudf::test::print(result->get_column(0).view());
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
+}
+
+#if 0
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
 {
   auto constexpr num_rows = 40'000;
@@ -100,6 +125,9 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
   auto constexpr byte_limit       = 240'000;
   auto const [result, num_chunks] = run_test(filepath, byte_limit);
   EXPECT_EQ(num_chunks, 2);
+
+  cudf::test::print(result->get_column(0).view());
+
   CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
 }
 
@@ -153,3 +181,5 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
     CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
   }
 }
+
+#endif

From 66e9f0947f03b944a1bdb8892367c2e3254d6c55 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 11:45:38 -0700
Subject: [PATCH 063/162] Store `raw_page_data`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/parquet_gpu.hpp | 4 +++-
 cpp/src/io/parquet/reader_impl.cu  | 7 ++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index ea7bbe72feb..6a67f510dea 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -24,6 +24,7 @@
 #include "io/utilities/hostdevice_vector.hpp"
 
 #include <cudf/column/column_device_view.cuh>
+#include <cudf/io/datasource.hpp>
 #include <cudf/lists/lists_column_device_view.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/types.hpp>
@@ -246,10 +247,11 @@ struct ColumnChunkDesc {
 
 // TODO: rename?
 struct file_intermediate_data {
+  std::vector<std::unique_ptr<datasource::buffer>> raw_page_data;
+  rmm::device_buffer decomp_page_data;
   hostdevice_vector<gpu::ColumnChunkDesc> chunks{};
   hostdevice_vector<gpu::PageInfo> pages_info{};
   hostdevice_vector<gpu::PageNestingInfo> page_nesting_info{};
-  rmm::device_buffer decomp_page_data;
   bool has_data{false};
 };
 
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 0428b0a70a2..d12bde290be 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1586,7 +1586,7 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
   std::vector<size_type> chunk_source_map(num_chunks);
 
   // Tracker for eventually deallocating compressed and uncompressed data
-  std::vector<std::unique_ptr<datasource::buffer>> page_data(num_chunks);
+  _file_itm_data.raw_page_data = std::vector<std::unique_ptr<datasource::buffer>>(num_chunks);
 
   // Keep track of column chunk file offsets
   std::vector<size_t> column_chunk_offsets(num_chunks);
@@ -1650,7 +1650,7 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
       }
     }
     // Read compressed chunk data to device memory
-    read_rowgroup_tasks.push_back(read_column_chunks(page_data,
+    read_rowgroup_tasks.push_back(read_column_chunks(_file_itm_data.raw_page_data,
                                                      _file_itm_data.chunks,
                                                      io_chunk_idx,
                                                      _file_itm_data.chunks.size(),
@@ -1677,7 +1677,8 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
       // Free compressed data
       for (size_t c = 0; c < _file_itm_data.chunks.size(); c++) {
         if (_file_itm_data.chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
-          page_data[c].reset();
+          _file_itm_data.raw_page_data[c].reset();
+          // TODO: Check if this is called
         }
       }
     }

From 669b8cf96cc5d0856926e4c384ea7fee7978f364 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 11:49:05 -0700
Subject: [PATCH 064/162] Cleanup test

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 29 --------------------
 1 file changed, 29 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 5cc0fa4d819..3718f1075a0 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -83,31 +83,6 @@ auto run_test(std::string const& filepath, std::size_t byte_limit)
 struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
 };
 
-TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
-{
-  auto constexpr num_rows = 400;
-  auto const filepath     = temp_env->get_temp_filepath("chunked_read_simple.parquet");
-
-  auto const values = thrust::make_counting_iterator(0);
-  auto const a      = int32s_col(values, values + num_rows);
-  auto const b      = int64s_col(values, values + num_rows);
-  auto const input  = cudf::table_view{{a, b}};
-
-  auto const write_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input).build();
-  cudf::io::write_parquet(write_opts);
-
-  auto constexpr byte_limit       = 0;
-  auto const [result, num_chunks] = run_test(filepath, byte_limit);
-  EXPECT_EQ(num_chunks, 1);
-
-  cudf::test::print(a);
-  cudf::test::print(result->get_column(0).view());
-
-  CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
-}
-
-#if 0
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
 {
   auto constexpr num_rows = 40'000;
@@ -126,8 +101,6 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
   auto const [result, num_chunks] = run_test(filepath, byte_limit);
   EXPECT_EQ(num_chunks, 2);
 
-  cudf::test::print(result->get_column(0).view());
-
   CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
 }
 
@@ -181,5 +154,3 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
     CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
   }
 }
-
-#endif

From 001c6c71d425c6321339535f31e4a6c3bba6dc4c Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 12:20:28 -0700
Subject: [PATCH 065/162] Fix empty output

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 44 +++++++++++++++----------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index d12bde290be..b0fa89552fb 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1784,31 +1784,29 @@ table_with_metadata reader::impl::read(size_type skip_rows,
     auto [skip_rows_corrected, num_rows_corrected] =
       preprocess_file(skip_rows, num_rows, row_group_list);
 
-    // todo: fix this (empty output may be incorrect)
-    if (!_file_itm_data.has_data) { return table_with_metadata{}; }
-
-    // - compute column sizes and allocate output buffers.
-    //   important:
-    //   for nested schemas, we have to do some further preprocessing to determine:
-    //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
-    //    of
-    //      nesting and it's size is the row count)
-    //
-    // - for nested schemas, output buffer offset values per-page, per nesting-level for the
-    // purposes of decoding.
-    // TODO: make this a parameter.
-
-    //      auto const _chunk_read_limit = 0;
-    preprocess_columns(_file_itm_data.chunks,
-                       _file_itm_data.pages_info,
-                       skip_rows_corrected,
-                       num_rows_corrected,
-                       uses_custom_row_bounds,
-                       _chunk_read_limit);
+    if (_file_itm_data.has_data) {
+      // - compute column sizes and allocate output buffers.
+      //   important:
+      //   for nested schemas, we have to do some further preprocessing to determine:
+      //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
+      //    of
+      //      nesting and it's size is the row count)
+      //
+      // - for nested schemas, output buffer offset values per-page, per nesting-level for the
+      // purposes of decoding.
+      // TODO: make this a parameter.
 
-    CUDF_EXPECTS(_chunk_read_info.size() == 1,
-                 "Reading the whole file should yield only one chunk.");
+      //      auto const _chunk_read_limit = 0;
+      preprocess_columns(_file_itm_data.chunks,
+                         _file_itm_data.pages_info,
+                         skip_rows_corrected,
+                         num_rows_corrected,
+                         uses_custom_row_bounds,
+                         _chunk_read_limit);
 
+      CUDF_EXPECTS(_chunk_read_info.size() == 1,
+                   "Reading the whole file should yield only one chunk.");
+    }
     _file_preprocessed = true;
   }
 

From f50603ad508c442a33b2d58eda137ef85c14601d Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 13:24:52 -0700
Subject: [PATCH 066/162] Add `preprocess_file_and_columns`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu  | 126 ++++++++++-------------------
 cpp/src/io/parquet/reader_impl.hpp |   7 ++
 2 files changed, 50 insertions(+), 83 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index b0fa89552fb..ccce66943a0 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1556,6 +1556,46 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
   }
 }
 
+void reader::impl::preprocess_file_and_columns(
+  size_type skip_rows,
+  size_type num_rows,
+  bool uses_custom_row_bounds,
+  std::vector<std::vector<size_type>> const& row_group_list)
+{
+  if (_file_preprocessed) { return; }
+
+  auto [skip_rows_corrected, num_rows_corrected] =
+    preprocess_file(skip_rows, num_rows, row_group_list);
+
+  if (_file_itm_data.has_data) {
+    // - compute column sizes and allocate output buffers.
+    //   important:
+    //   for nested schemas, we have to do some further preprocessing to determine:
+    //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
+    //    of
+    //      nesting and it's size is the row count)
+    //
+    // - for nested schemas, output buffer offset values per-page, per nesting-level for the
+    // purposes of decoding.
+    // TODO: make this a parameter.
+
+    //      auto const _chunk_read_limit = 0;
+    preprocess_columns(_file_itm_data.chunks,
+                       _file_itm_data.pages_info,
+                       skip_rows_corrected,
+                       num_rows_corrected,
+                       uses_custom_row_bounds,
+                       _chunk_read_limit);
+
+    if (_chunk_read_limit == 0) {
+      CUDF_EXPECTS(_chunk_read_info.size() == 1,
+                   "Reading the whole file should yield only one chunk.");
+    }
+  }
+
+  _file_preprocessed = true;
+}
+
 std::pair<size_type, size_type> reader::impl::preprocess_file(
   size_type skip_rows,
   size_type num_rows,
@@ -1780,36 +1820,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
 {
   CUDF_EXPECTS(_chunk_read_limit == 0, "Reading the whole file must not have non-zero byte_limit.");
 
-  if (!_file_preprocessed) {
-    auto [skip_rows_corrected, num_rows_corrected] =
-      preprocess_file(skip_rows, num_rows, row_group_list);
-
-    if (_file_itm_data.has_data) {
-      // - compute column sizes and allocate output buffers.
-      //   important:
-      //   for nested schemas, we have to do some further preprocessing to determine:
-      //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
-      //    of
-      //      nesting and it's size is the row count)
-      //
-      // - for nested schemas, output buffer offset values per-page, per nesting-level for the
-      // purposes of decoding.
-      // TODO: make this a parameter.
-
-      //      auto const _chunk_read_limit = 0;
-      preprocess_columns(_file_itm_data.chunks,
-                         _file_itm_data.pages_info,
-                         skip_rows_corrected,
-                         num_rows_corrected,
-                         uses_custom_row_bounds,
-                         _chunk_read_limit);
-
-      CUDF_EXPECTS(_chunk_read_info.size() == 1,
-                   "Reading the whole file should yield only one chunk.");
-    }
-    _file_preprocessed = true;
-  }
-
+  preprocess_file_and_columns(skip_rows, num_rows, uses_custom_row_bounds, row_group_list);
   return read_chunk_internal(uses_custom_row_bounds);
 }
 
@@ -1825,64 +1836,13 @@ table_with_metadata reader::impl::read_chunk()
                                 _timestamp_type.id());
   }
 
-  if (!_file_preprocessed) {
-    [[maybe_unused]] auto [skip_rows_corrected, num_rows_corrected] = preprocess_file(0, -1, {});
-
-    // todo: fix this (empty output may be incorrect)
-    if (_file_itm_data.has_data) {
-      // - compute column sizes and allocate output buffers.
-      //   important:
-      //   for nested schemas, we have to do some further preprocessing to determine:
-      //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
-      //    of
-      //      nesting and it's size is the row count)
-      //
-      // - for nested schemas, output buffer offset values per-page, per nesting-level for the
-      // purposes of decoding.
-      preprocess_columns(_file_itm_data.chunks,
-                         _file_itm_data.pages_info,
-                         skip_rows_corrected,
-                         num_rows_corrected,
-                         true /*uses_custom_row_bounds*/,
-                         _chunk_read_limit);
-    }
-    _file_preprocessed = true;
-  }
-
+  preprocess_file_and_columns(0, -1, true, {});
   return read_chunk_internal(true);
 }
 
 bool reader::impl::has_next()
 {
-  printf("prepr: %d\n", (int)_file_preprocessed);
-
-  if (!_file_preprocessed) {
-    [[maybe_unused]] auto [skip_rows_corrected, num_rows_corrected] = preprocess_file(0, -1, {});
-
-    // todo: fix this (empty output may be incorrect)
-    if (_file_itm_data.has_data) {
-      // - compute column sizes and allocate output buffers.
-      //   important:
-      //   for nested schemas, we have to do some further preprocessing to determine:
-      //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
-      //    of
-      //      nesting and it's size is the row count)
-      //
-      // - for nested schemas, output buffer offset values per-page, per nesting-level for the
-      // purposes of decoding.
-      // TODO: make this a parameter.
-
-      //      auto const _chunk_read_limit = 0;
-      preprocess_columns(_file_itm_data.chunks,
-                         _file_itm_data.pages_info,
-                         skip_rows_corrected,
-                         num_rows_corrected,
-                         true /*uses_custom_row_bounds*/,
-                         _chunk_read_limit);
-    }
-    _file_preprocessed = true;
-  }
-
+  preprocess_file_and_columns(0, -1, true, {});
   return _current_read_chunk < _chunk_read_info.size();
 }
 
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index a3b7f7594e4..3185d6e9554 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -100,6 +100,13 @@ class reader::impl {
   bool has_next();
 
  private:
+  // TODO
+  void preprocess_file_and_columns(size_type skip_rows,
+                                   size_type num_rows,
+                                   bool uses_custom_row_bounds,
+                                   const std::vector<std::vector<size_type>>& row_group_list);
+
+  // TODO
   table_with_metadata read_chunk_internal(bool uses_custom_row_bounds);
 
   /**

From 66976aa9ca5183846b50e95cb7aa9ee291345ba8 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 14:24:39 -0700
Subject: [PATCH 067/162] Misc

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/parquet_gpu.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 6a67f510dea..1781a0685d1 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -257,8 +257,8 @@ struct file_intermediate_data {
 
 // TODO: rename?
 struct chunk_intermediate_data {
-  rmm::device_uvector<int> page_keys{0, rmm::cuda_stream_default};
-  rmm::device_uvector<int> page_index{0, rmm::cuda_stream_default};
+  rmm::device_uvector<int32_t> page_keys{0, rmm::cuda_stream_default};
+  rmm::device_uvector<int32_t> page_index{0, rmm::cuda_stream_default};
   rmm::device_uvector<string_index_pair> str_dict_index{0, rmm::cuda_stream_default};
 };
 

From 0b0040a6a62bfa710acc96f1d9be3e7e4416d4ea Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Wed, 19 Oct 2022 16:50:16 -0500
Subject: [PATCH 068/162] Fixed some incorrect logic in preprocess tep.

---
 cpp/src/io/parquet/page_data.cu         | 181 +++++-------------------
 cpp/src/io/parquet/reader_preprocess.cu |   4 +-
 2 files changed, 41 insertions(+), 144 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index c0dc5eeea9e..6c33c13e828 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -848,7 +848,8 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
                                           PageInfo const* p,
                                           device_span<ColumnChunkDesc const> chunks,
                                           size_t min_row,
-                                          size_t num_rows)
+                                          size_t num_rows,
+                                          int page_idx = 0)
 {
   int t = threadIdx.x;
   int chunk_idx;
@@ -944,15 +945,30 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
       // first row within the page to output
       if (page_start_row >= min_row) {
         s->first_row = 0;
+        if(page_idx == 1){
+          printf("A: %lu, %lu\n", page_start_row, min_row);
+        }
       } else {
         s->first_row = (int32_t)min(min_row - page_start_row, (size_t)s->page.num_rows);
+        if(page_idx == 1){
+          printf("B: %lu, %lu, %d\n", min_row, page_start_row, s->page.num_rows);
+        }
       }
       // # of rows within the page to output
       s->num_rows = s->page.num_rows;
+      if(page_idx == 1){
+        printf("X: %lu, %d, %d, %lu, %lu, %d\n", page_start_row, s->first_row, s->num_rows, min_row, num_rows, s->page.num_rows);
+      }
       if ((page_start_row + s->first_row) + s->num_rows > min_row + num_rows) {
         s->num_rows =
           (int32_t)max((int64_t)(min_row + num_rows - (page_start_row + s->first_row)), INT64_C(0));
+        if(page_idx == 1){
+          printf("C: %lu, %d, %d, %lu, %lu\n", page_start_row, s->first_row, s->num_rows, min_row, num_rows);
+        }          
       }
+      if(page_idx == 1){
+        printf("FF: %d, %d, %d\n", s->first_row, s->num_rows, s->page.num_rows);
+      }  
 
       // during the decoding step we need to offset the global output buffers
       // for each level of nesting so that we write to the section this page
@@ -1436,33 +1452,28 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
     // computing the full size of the column.  on the second pass, we will know our actual row
     // bounds, so the computation will cap sizes properly.
     int in_row_bounds                = 1;
-    auto const first_thread_in_range = [&]() {
-      if (bounds_set) {
-        // absolute row index
-        int32_t thread_row_index =
-          input_row_count + ((__popc(warp_row_count_mask & ((1 << t) - 1)) + is_new_row) - 1);
-        in_row_bounds = thread_row_index >= s->row_index_lower_bound &&
-                            thread_row_index < (s->first_row + s->num_rows)
-                          ? 1
-                          : 0;
-
-        uint32_t const row_bounds_mask  = ballot(in_row_bounds);
-        int const first_thread_in_range = __ffs(row_bounds_mask) - 1;
-
-        // if we've found the beginning of the first row, mark down the position
-        // in the def/repetition buffer (skipped_values) and the data buffer (skipped_leaf_values)
-        if (!t && first_thread_in_range >= 0 && s->page.skipped_values < 0) {
-          // how many values we've skipped in the rep/def levels
-          s->page.skipped_values = input_value_count + first_thread_in_range;
-          // how many values we've skipped in the actual data stream
-          s->page.skipped_leaf_values =
-            input_leaf_count + __popc(warp_leaf_count_mask & ((1 << first_thread_in_range) - 1));
-        }
-
-        return first_thread_in_range;
+    if (bounds_set) {
+      // absolute row index
+      int32_t thread_row_index =
+        input_row_count + ((__popc(warp_row_count_mask & ((1 << t) - 1)) + is_new_row) - 1);
+      in_row_bounds = thread_row_index >= s->row_index_lower_bound &&
+                          thread_row_index < (s->first_row + s->num_rows)
+                        ? 1
+                        : 0;
+
+      uint32_t const row_bounds_mask  = ballot(in_row_bounds);
+      int const first_thread_in_range = __ffs(row_bounds_mask) - 1;
+
+      // if we've found the beginning of the first row, mark down the position
+      // in the def/repetition buffer (skipped_values) and the data buffer (skipped_leaf_values)
+      if (!t && first_thread_in_range >= 0 && s->page.skipped_values < 0) {
+        // how many values we've skipped in the rep/def levels
+        s->page.skipped_values = input_value_count + first_thread_in_range;
+        // how many values we've skipped in the actual data stream
+        s->page.skipped_leaf_values =
+          input_leaf_count + __popc(warp_leaf_count_mask & ((1 << first_thread_in_range) - 1));
       }
-      return 0;
-    }();
+    }
 
     // increment counts across all nesting depths
     for (int s_idx = 0; s_idx < max_depth; s_idx++) {
@@ -1515,120 +1526,6 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   }
 }
 
-#if 0
-/**
- * @brief Kernel for computing per-page column size information for all nesting levels.
- *
- * This function will write out the size field for each level of nesting.
- *
- * @param pages List of pages
- * @param chunks List of column chunks
- * @param min_row Row index to start reading at
- * @param num_rows Maximum number of rows to read. Pass as INT_MAX to guarantee reading all rows.
- * @param trim_pass Whether or not this is the trim pass.  We first have to compute
- * the full size information of every page before we come through in a second (trim) pass
- * to determine what subset of rows in this page we should be reading.
- */
-__global__ void __launch_bounds__(block_size)
-  gpuComputePageSizes(PageInfo* pages,
-                      device_span<ColumnChunkDesc const> chunks,
-                      size_t min_row,
-                      size_t num_rows,
-                      bool trim_pass)
-{
-  __shared__ __align__(16) page_state_s state_g;
-
-  page_state_s* const s = &state_g;
-  int page_idx          = blockIdx.x;
-  int t                 = threadIdx.x;
-  PageInfo* pp          = &pages[page_idx];
-  
-  if (!setupLocalPageInfo(s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX)) {
-    return;
-  }
-
-  // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
-  // containing lists anywhere within).
-  bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
-  bool const is_string_column = (s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4;
-
-  if(!t){
-    printf("is_string_column: %d\n", (int)is_string_column);
-  }
-
-  // if this is a flat hierarchy (no lists) and is not a string column, compute the size directly from the number of values.  
-  if (!has_repetition && !is_string_column) {
-    if (!t) {
-      for (size_type idx = 0; idx < pp->num_nesting_levels; idx++) {
-        pp->nesting[idx].size = pp->num_input_values;
-      }
-    }
-    return;
-  }
-
-  // zero sizes
-  int d = 0;
-  while (d < s->page.num_nesting_levels) {
-    if (d + t < s->page.num_nesting_levels) { s->page.nesting[d + t].size = 0; }
-    d += blockDim.x;
-  }
-  if (!t) {
-    s->page.skipped_values      = -1;
-    s->page.skipped_leaf_values = -1;
-    s->page.str_bytes           = 0;
-    s->input_row_count          = 0;
-    s->input_value_count        = 0;    
-
-    // if this isn't the trim pass, make sure we visit absolutely everything
-    if (!trim_pass) {
-      s->first_row             = 0;
-      s->num_rows              = INT_MAX;
-      s->row_index_lower_bound = -1;
-    }
-  }
-  __syncthreads();
-
-  // optimization : it might be useful to have a version of gpuDecodeStream that could go wider than
-  // 1 warp.  Currently it only uses 1 warp so that it can overlap work with the value decoding step
-  // when in the actual value decoding kernel. However, during this preprocess step we have no such
-  // limits -  we could go as wide as block_size
-  if (t < 32) {
-    constexpr int batch_size = 32;
-    int target_input_count   = batch_size;
-    while (!s->error && s->input_value_count < s->num_input_values) {      
-      // decode repetition and definition levels. these will attempt to decode at
-      // least up to the target, but may decode a few more.
-      if(has_repetition){
-        gpuDecodeStream(s->rep, s, target_input_count, t, level_type::REPETITION);
-      }
-      gpuDecodeStream(s->def, s, target_input_count, t, level_type::DEFINITION);
-      __syncwarp();
-
-      // we may have decoded different amounts from each stream, so only process what we've been
-      int actual_input_count = has_repetition ? min(s->lvl_count[level_type::REPETITION],
-                                                  s->lvl_count[level_type::DEFINITION])
-                                              : s->lvl_count[level_type::DEFINITION];
-
-      // process what we got back
-      if(is_string_column){
-        gpuUpdatePageSizes<true>(s, actual_input_count, t, trim_pass);
-      } else {
-        gpuUpdatePageSizes<false>(s, actual_input_count, t, trim_pass);
-      }
-      
-      target_input_count = actual_input_count + batch_size;
-      __syncwarp();
-    }
-  }
-  // update # rows in the actual page
-  if (!t) {
-    pp->num_rows            = s->page.nesting[0].size;
-    pp->skipped_values      = s->page.skipped_values;
-    pp->skipped_leaf_values = s->page.skipped_leaf_values;
-  }
-}
-#endif
-
 /**
  * @brief Kernel for computing per-page column size information for all nesting levels.
  *
@@ -1656,7 +1553,7 @@ __global__ void __launch_bounds__(block_size)
   int t                 = threadIdx.x;
   PageInfo* pp          = &pages[page_idx];
 
-  if (!setupLocalPageInfo(s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX)) {
+  if (!setupLocalPageInfo(s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX, page_idx)) {
     return;
   }
 
diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index dbb1d8114ba..32967b255ac 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -476,8 +476,8 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
     auto const will_trim_later = uses_custom_row_bounds || chunked_read_size > 0;
     gpu::ComputePageSizes(pages,
                           chunks,
-                          !will_trim_later ? min_row : 0,
-                          !will_trim_later ? num_rows : INT_MAX,
+                          will_trim_later ? min_row : 0,
+                          will_trim_later ? num_rows : INT_MAX,
                           !will_trim_later,
                           _stream);
 

From 467de78e8b172fe0b302354a9ebcfff170ae7b4c Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Wed, 19 Oct 2022 16:55:25 -0500
Subject: [PATCH 069/162] Removed debug stuff. Added some comments.

---
 cpp/src/io/parquet/page_data.cu | 38 ++++++++++++---------------------
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 6c33c13e828..e8348605405 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -942,33 +942,23 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
         s->dtype_len = 8;  // Convert to 64-bit timestamp
       }
 
-      // first row within the page to output
-      if (page_start_row >= min_row) {
-        s->first_row = 0;
-        if(page_idx == 1){
-          printf("A: %lu, %lu\n", page_start_row, min_row);
+      // NOTE: s->page.num_rows, s->col.chunk_row, s->first_row and s->num_rows will be 
+      // invalid/bogus during first pass of the preprocess step for nested types. this is ok
+      // because we ignore these values in that stage.
+      {
+        // first row within the page to output
+        if (page_start_row >= min_row) {
+          s->first_row = 0;
+        } else {
+          s->first_row = (int32_t)min(min_row - page_start_row, (size_t)s->page.num_rows);
         }
-      } else {
-        s->first_row = (int32_t)min(min_row - page_start_row, (size_t)s->page.num_rows);
-        if(page_idx == 1){
-          printf("B: %lu, %lu, %d\n", min_row, page_start_row, s->page.num_rows);
+        // # of rows within the page to output
+        s->num_rows = s->page.num_rows;
+        if ((page_start_row + s->first_row) + s->num_rows > min_row + num_rows) {
+          s->num_rows =
+            (int32_t)max((int64_t)(min_row + num_rows - (page_start_row + s->first_row)), INT64_C(0));
         }
       }
-      // # of rows within the page to output
-      s->num_rows = s->page.num_rows;
-      if(page_idx == 1){
-        printf("X: %lu, %d, %d, %lu, %lu, %d\n", page_start_row, s->first_row, s->num_rows, min_row, num_rows, s->page.num_rows);
-      }
-      if ((page_start_row + s->first_row) + s->num_rows > min_row + num_rows) {
-        s->num_rows =
-          (int32_t)max((int64_t)(min_row + num_rows - (page_start_row + s->first_row)), INT64_C(0));
-        if(page_idx == 1){
-          printf("C: %lu, %d, %d, %lu, %lu\n", page_start_row, s->first_row, s->num_rows, min_row, num_rows);
-        }          
-      }
-      if(page_idx == 1){
-        printf("FF: %d, %d, %d\n", s->first_row, s->num_rows, s->page.num_rows);
-      }  
 
       // during the decoding step we need to offset the global output buffers
       // for each level of nesting so that we write to the section this page

From 58617478cd08887d6fe19fb126ea554b75ced027 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 19 Oct 2022 21:10:48 -0700
Subject: [PATCH 070/162] Change function

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 3718f1075a0..3393a6139b5 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -54,7 +54,7 @@ using int32s_col  = cudf::test::fixed_width_column_wrapper<int32_t>;
 using int64s_col  = cudf::test::fixed_width_column_wrapper<int64_t>;
 using strings_col = cudf::test::strings_column_wrapper;
 
-auto run_test(std::string const& filepath, std::size_t byte_limit)
+auto chunked_read(std::string const& filepath, std::size_t byte_limit)
 {
   auto const read_opts =
     cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath})
@@ -97,8 +97,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
     cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input).build();
   cudf::io::write_parquet(write_opts);
 
-  auto constexpr byte_limit       = 240'000;
-  auto const [result, num_chunks] = run_test(filepath, byte_limit);
+  auto const [result, num_chunks] = chunked_read(filepath, 240'000);
   EXPECT_EQ(num_chunks, 2);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
@@ -144,12 +143,12 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
   // byte_limit==500000  should give 2 chunks: {0, 40000}, {40000, 20000}
   // byte_limit==1000000 should give 1 chunks: {0, 60000},
   {
-    auto const [result, num_chunks] = run_test(filepath, 500'000);
+    auto const [result, num_chunks] = chunked_read(filepath, 500'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
   }
   {
-    auto const [result, num_chunks] = run_test(filepath, 1'000'000);
+    auto const [result, num_chunks] = chunked_read(filepath, 1'000'000);
     EXPECT_EQ(num_chunks, 1);
     CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
   }

From 721c0527d3705483f7ac40043d9882c49e81f9c3 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 20 Oct 2022 09:20:57 -0700
Subject: [PATCH 071/162] Disable debug printing

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index ccce66943a0..ef762d97819 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1601,7 +1601,7 @@ std::pair<size_type, size_type> reader::impl::preprocess_file(
   size_type num_rows,
   const std::vector<std::vector<size_type>>& row_group_list)
 {
-  printf("\n\n\n\npreprocess========================\n");
+  //  printf("\n\n\n\npreprocess========================\n");
 
   // Select only row groups required
   // Note: `skip_rows` and `num_rows` will be modified in this function.
@@ -1768,7 +1768,8 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
                    read_info.num_rows,
                    uses_custom_row_bounds);
 
-  printf("read skip_rows = %d, num_rows = %d\n", (int)read_info.skip_rows, (int)read_info.num_rows);
+  //  printf("read skip_rows = %d, num_rows = %d\n", (int)read_info.skip_rows,
+  //  (int)read_info.num_rows);
 
   // decoding column data
   decode_page_data(_file_itm_data.chunks,

From dbdfc7400289b21c1df58e7abef73b21a498f599 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 20 Oct 2022 12:43:20 -0700
Subject: [PATCH 072/162] Implement `ParquetChunkedReader`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 .../ai/rapids/cudf/ParquetChunkedReader.java  | 104 ++++++++++++++++++
 .../test/java/ai/rapids/cudf/TableTest.java   |  36 ++++--
 java/src/test/resources/splittable.parquet    | Bin 0 -> 320341 bytes
 3 files changed, 131 insertions(+), 9 deletions(-)
 create mode 100644 java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
 create mode 100644 java/src/test/resources/splittable.parquet

diff --git a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
new file mode 100644
index 00000000000..f65c4123060
--- /dev/null
+++ b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
@@ -0,0 +1,104 @@
+/*
+ *
+ *  Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+package ai.rapids.cudf;
+
+import java.io.File;
+
+/**
+ * TODO
+ */
+public class ParquetChunkedReader implements AutoCloseable {
+  private long handle;
+
+
+  /**
+   * TODO
+   */
+  ParquetChunkedReader(long chunkSizeByteLimit, File path) {
+    this(chunkSizeByteLimit, ParquetOptions.DEFAULT, path);
+  }
+
+  /**
+   * TODO
+   */
+  ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, File path) {
+    handle = create(chunkSizeByteLimit, opts.getIncludeColumnNames(), opts.getReadBinaryAsString(),
+        path.getAbsolutePath(), 0, 0, opts.timeUnit().typeId.getNativeId());
+  }
+
+  /**
+   * TODO
+   * @param chunkSizeByteLimit Byte limit (ex: 1MB=1048576)
+   * @param opts
+   * @param buffer
+   * @param offset
+   * @param len
+   */
+  ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, HostMemoryBuffer buffer,
+      long offset, long len) {
+    handle = create(chunkSizeByteLimit, opts.getIncludeColumnNames(), opts.getReadBinaryAsString(), null,
+        buffer.getAddress() + offset, len, opts.timeUnit().typeId.getNativeId());
+  }
+
+  /**
+   * TODO
+   */
+  public boolean hasNext() {
+    return hasNext(handle);
+  }
+
+  /**
+   * TODO
+   */
+  public Table readChunk() {
+    long[] columnPtrs = readChunk(handle);
+    if (columnPtrs == null) {
+      return null;
+    } else {
+      return new Table(columnPtrs);
+    }
+  }
+
+  @Override
+  public void close() {
+    if (handle != 0) {
+      close(handle);
+      handle = 0;
+    }
+  }
+
+  /**
+   * TODO
+   * @param chunkSizeByteLimit     TODO
+   * @param filterColumnNames  name of the columns to read, or an empty array if we want to read
+   * @param binaryToString     whether to convert this column to String if binary
+   * @param filePath           the path of the file to read, or null if no path should be read.
+   * @param bufferAddrs        the address of the buffer to read from or 0 if we should not.
+   * @param length             the length of the buffer to read from.
+   * @param timeUnit           return type of TimeStamp in units
+   */
+  private static native long create(long chunkSizeByteLimit, String[] filterColumnNames,
+      boolean[] binaryToString, String filePath, long bufferAddrs, long length, int timeUnit);
+
+  private static native boolean hasNext(long handle);
+
+  private static native long[] readChunk(long handle);
+
+  private static native void close(long handle);
+}
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index f31da054091..db1adb8fa80 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -75,6 +75,7 @@
 
 public class TableTest extends CudfTestBase {
   private static final File TEST_PARQUET_FILE = TestUtils.getResourceAsFile("acq.parquet");
+  private static final File TEST_PARQUET_FILE_CHUNKED_READ = TestUtils.getResourceAsFile("splittable.parquet");
   private static final File TEST_PARQUET_FILE_BINARY = TestUtils.getResourceAsFile("binary.parquet");
   private static final File TEST_ORC_FILE = TestUtils.getResourceAsFile("TestOrcFile.orc");
   private static final File TEST_ORC_TIMESTAMP_DATE_FILE = TestUtils.getResourceAsFile("timestamp-date-test.orc");
@@ -725,6 +726,23 @@ void testReadParquetContainsDecimalData() {
     }
   }
 
+  @Test
+  void testChunkedReadParquet() {
+    try (ParquetChunkedReader reader = new ParquetChunkedReader(240000,
+        TEST_PARQUET_FILE_CHUNKED_READ)) {
+      int numChunks = 0;
+      long totalRows = 0;
+      while(reader.hasNext()) {
+        ++numChunks;
+        try(Table chunk = reader.readChunk()) {
+          totalRows += chunk.getRowCount();
+        }
+      }
+      assertEquals(2, numChunks);
+      assertEquals(40000, totalRows);
+    }
+  }
+
   @Test
   void testReadAvro() {
     AvroOptions opts = AvroOptions.builder()
@@ -5798,8 +5816,8 @@ private static Scalar getDecimalScalarRangeBounds(int scale, int unscaledValue,
       case 2: return Scalar.fromDecimal(scale, unscaledValue);
       case 3: return Scalar.fromDecimal(scale, Long.valueOf(unscaledValue));
       case 4: return Scalar.fromDecimal(scale, big(unscaledValue));
-      default: 
-        throw new IllegalStateException("Unexpected order by column index: " 
+      default:
+        throw new IllegalStateException("Unexpected order by column index: "
                                         + orderby_col_idx);
     }
   }
@@ -5809,11 +5827,11 @@ void testRangeWindowsWithDecimalOrderBy() {
     try (Table unsorted = new Table.TestBuilder()
         .column(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) // GBY Key
         .column(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3) // GBY Key
-        .decimal32Column(-1, 4000, 3000, 2000, 1000, 
-                             4000, 3000, 2000, 1000, 
+        .decimal32Column(-1, 4000, 3000, 2000, 1000,
+                             4000, 3000, 2000, 1000,
                              4000, 3000, 2000, 1000) // Decimal OBY Key
-        .decimal64Column(-1, 4000l, 3000l, 2000l, 1000l, 
-                             4000l, 3000l, 2000l, 1000l, 
+        .decimal64Column(-1, 4000l, 3000l, 2000l, 1000l,
+                             4000l, 3000l, 2000l, 1000l,
                              4000l, 3000l, 2000l, 1000l) // Decimal OBY Key
         .decimal128Column(-1, RoundingMode.UNNECESSARY,
                               big(4000), big(3000), big(2000), big(1000),
@@ -5822,13 +5840,13 @@ void testRangeWindowsWithDecimalOrderBy() {
         .column(9, 1, 5, 7, 2, 8, 9, 7, 6, 6, 0, 8) // Agg Column
         .build()) {
 
-      // Columns 2,3,4 are decimal order-by columns of type DECIMAL32, DECIMAL64, 
+      // Columns 2,3,4 are decimal order-by columns of type DECIMAL32, DECIMAL64,
       // and DECIMAL128 respectively, with similarly ordered values.
       // In the following loop, each decimal type is tested as the order-by column,
       // producing the same results with similar range bounds.
       for (int decimal_oby_col_idx = 2; decimal_oby_col_idx <= 4; ++decimal_oby_col_idx) {
-        try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), 
-                                             OrderByArg.asc(1), 
+        try (Table sorted = unsorted.orderBy(OrderByArg.asc(0),
+                                             OrderByArg.asc(1),
                                              OrderByArg.asc(decimal_oby_col_idx));
             ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) {
           ColumnVector sortedAggColumn = sorted.getColumn(5);
diff --git a/java/src/test/resources/splittable.parquet b/java/src/test/resources/splittable.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..0f110ee10007546ce26b40c93da46cf527db10f7
GIT binary patch
literal 320341
zcmW*UV+`O7vj))bUE8*8+qP}nwr$(CZQHhOd+*uX^fYPud(tMId8S<c3h5*NFLJ*J
z!T(7WdEd4GZi7S)^1uH-{&NjLKmrk%e~*F^jNpVIB%ugR7{U^c@I)XYk%&wbq7seh
z#2_ZIh)o>g5|8*KAR&oJOcIikjO3&sC8<bF8q$)E^kg6-naE5QvXYJL<RB-x$W0#d
zl8^ippdf`POc9DwjN+7_B&8@#8Ol<Q@>HNAm8eV=s#1;W)SxD{s7)Q}QjhvHpdpQD
zOcR>YjOMhUC9P;p8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6
zB%>J37{)S=@l0SMlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo
z^=x1xo7l`2wz7@w>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lo
zu5yj*+~6j+xXm5za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN|Q
z_{|^w^1uI_1Rx-R2uu)y5{%%4AS9s(O&G!wj_^bvB9Vwp6rvK1=)@o<v4~9^;u4Se
zBp@M)NK6uvl8oe}ASJ0tO&ZdYj`U<8Bbmrd7P69!?BpOPxyVf(@{*7I6rdo5C`=KG
zQjFr1pd_UzO&Q8kj`CEXB9*926{=E=>eQenwWv)U>QayTG@v1kXiO8D(v0S`pe3zn
zO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)j
zB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD
z?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6
z?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=zxd4`{_^j|{~rMe
zNFV|egrEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=R
zIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@
zvXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpC2NFy54gr+p3IW1^OD_YZrwzQ)?9q33W
zI@5)&bfY^x=t(bn(}%wFqdx-}$RGwYgrN*$I3pOzC`L1ev5aFp6PU;(CNqVpOk+AT
zn8_?=Gl#j%V?GO5$RZZAgrzKFIV)JnDps?GwX9=38`#JuHnWATY-2k+*vT$-vxmLx
zV?PHt$RQ4MgrgkeI43yCDNb{Svz+5R7r4kJE^~#eT;n=7xXCSUbBDX!<30~~$Ri%}
zgr_{?IWKt0D_--4x4h#$ANa^8KJ$gIeB(Pm_{lGR^M}9u`{@6V00blufeAuTf)Sh$
zgd`N92}4-I5uOM{BodK{LR6v=ofyO<7O{y#T;dU*1SBL8iAh3Il98Mgq$CxoNkdxF
zk)8}>Bomp*LRPYoogCyO7rDtpUh<Kj0u-bWg(*T&icy>rl%y1;DMMMxQJxA^q!N{>
zLRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esVw4@cSX+vAu(Vh-;q!XR#LRY%cogVb0
z7rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{F`fxbWD=8^!c?X)of*tz7PFbdT;?&K
z1uSF{i&?@_ma&`_tYj6dS;Jb^v7QZVWD}d&!dAAiogM6C7rWWRUiPt{103WKhdIJg
zj&Yn5oa7XzIm21bah?lY<Pw*;!d0$uog3Wb7Pq;>UG8z82R!5vk9opVp7ER)yyO+H
zdBa=Y@tzNS<P)Fy!dJfWoge(<7r*(#U;Y<J{S$zI1R^j&2ud)56M~S0A~azLOE|(4
zfrvyRGEs<1G@=uOn8YGBafnMi;*)@cBqA|MNJ=u2lY*3_A~k79OFGh%fsAA#Gg-(=
zHnNk0oa7=mdB{sX@>76<6rwOiC`vJkQ-YF|qBLbFOF7C@fr?b3GF7NbHL6pCn$)5;
zb*M`{>eGORG@>z0Xi77h(}I??qBU)3OFP=rfsS;dGhOIPH@ee<p7f$OedtR+`ZIum
z3}P@t7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!G^R6ynapA~bC}CK=Cgo>EMhTBSjsY%
zvx1eZVl``6%R1JxfsJfpGh5ioHny{ao$O*ad)Ui9_H%%P9O5uXILa}ObApqc;xuPC
z%Q?<-fs0(?GFQ0DHLi1mo800yceu+v?(=|$JmN7=c*--L^MaSW;x%u0%RAolfscIR
zGhg`1H@@?OpZwxCfB4J)0;_)l5RgCwCI~?ZMsPw9l2C*u3}FdJcp?yyNJJ(IQHe%$
zVi1#9#3l}LiAQ`AkdQ<qCJ9MNMsiY+l2oK74QWY7dNPoaOk^etS;<Cra*&f;<R%Y!
z$wz()P>@0trU*qTMsZ3|l2VkW3}q=tc`8tmN>ru_RjEdGYEY9})TRz~sYiVp(2zzn
zrU^}HMsr%wl2){)4Q**hdpgjOPIRUVUFk-5deD<z^rjDe=|_JCFpxnEW(Y$W#&AY3
zl2MFi3}YF`cqTBBNla!6Q<=teW-yak%w`UAna6w<u#iP8W(iAK#&TA$l2xo`4QpA)
zdN#0;O>AZhTiM2TcCeFO>}C&p*~fkkaF9bB<_JeQ#&J$?l2e@K3}-pVc`k5~OI+p(
zSGmS@Zg7)Z+~y8<xyOAT@Q_D5<_S-E#&cfql2^Ru4R3kJdp_`yPkiPJU-`y&e(;lD
z{N@jT`CkzAPXGcEh`<CPD8UF$2tpEy(1al@;RsIzA`*#z{}rMTm1smK1~G|6Y~m1?
zc*G|G2}wj^l8}^SBqs$aNkwYXkd}0$Cj%MDL}s#(m26}u2RX?_Zt{?qeB`G91t~;f
zicpkd6sH6wDMe|@P?mC(rveqJL}jW_m1<O{1~sWgZR${$deo-@4QWJUn$VPHG^YhE
zX+>+=(3W<zrvn}7L}$9tm2PyW2R-RUZ~D-ee)MMm0~y3%hA@<23}*x*8O3PEFqUzQ
zX95$M#AK#0m1#_81~Zw(Z00bRdCX@43t7Zsmavp%EN2BPS;cDBu$FbKX9FAA#Addz
zm2GTi2RqrtZuYR3eeCA|2RX!Hj&PJ?9OnclImKztaF%nN=K>eG#AU8<m1|t*1~<9I
zZSHWFd)(&%4|&96p74}sJm&>3dBtnq@RoPH=K~-4#Am+nm2Z6K2S546Z~pL?{{>b5
z1Rx-R2uu)y5{%%4AS9s(O&G!wj_^bvB9Vwp6rvK1=)@o<v4~9^;u4SeBp@M)NK6uv
zl8oe}ASJ0tO&ZdYj`U<8Bbmrd7P69!?BpOPxyVf(@{*7I6rdo5C`=KGQjFr1pd_Uz
zO&Q8kj`CEXB9*926{=E=>eQenwWv)U>QayTG@v1kXiO8D(v0S`pe3znO&i+Mj`nn*
zBc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb
z>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh
z_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZ
zc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=zxd4`{_^h!|9=D^Ab|)>5P}kn
z;DjI~p$JVF!V-?~L?9xOh)fis5{>A@ASSVhO&sD9kN6}YA&E##5|WaP<fI@asYp#4
z(vpt!WFRA%$V?Wpl8x--ASb!VO&;=+kNgy%AcZJQ5sFfb;*_8yr6^4q%2JN<RG=c2
zs7w{AQjO}=peD7bO&#h|kNPyAA&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7(v9x)
zpeMcPO&|KwkNyl`AcGjp5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je%^c=3
zkNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$kNq6rAcr{2
z5sq?<<DB3mr#Q_S&T@|PT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n=e*!0
zuXxQH-tvz3eBdLW_{<l+@{RBO;3vQM%^&{ques+x0uYcu1SSYU2}W>25Ry=YCJbQ-
zM|dI-kw`=)3Q>thbYc*bSi~j{afwHK5|EHYBqj+-Nk(!~kdjoSCJkvxM|v`lkxXPJ
z3t7oVc5;xDT;wJXdC5n93Q&+j6s8D8DMoQhP?A!VrVM2%M|mnxkxEpi3RS5_b!t$P
zTGXZvb*V>v8qknNG^PnnX-0Ee(2`cPrVVXrM|(QZkxq1`3tj0(cY4s1Ui799ed$Mk
z1~8C83}y&J8OCr%Fp^P>W(;E)$9N_%kx5Ku3R9WJbY?J<S<Ge*bD76{7O;>-EM^Hy
zS;lf!u##1*W({ju$9gufkxgu73tQR7c6P9nUF>ELd)dc+4seh|9Oei|ImU5LaFSD;
z<_u>!$9XPrkxN|W3Rk(tb#8EzTioUjce%%X9`KMyJmv{cdB$^I@RC=&<_&Lo$9q2T
zkxzW)3t#!hcYg4bU;O3|fBDzg_a6ZWNFV|egrEc?I3Wm0C_)p4u!JK#5r{}6A`^wE
zL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18
zlZU+IBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpC2
zNFy54gr+p3IW1^OD_YZrwzQ)?9q33WI@5)&bfY^x=t(bn(}%wFqdx-}$RGwYgrN*$
zI3pOzC`L1ev5aFp6PU;(CNqVpOk+ATn8_?=Gl#j%V?GO5$RZZAgrzKFIV)JnDps?G
zwX9=38`#JuHnWATY-2k+*vT$-vxmLxV?PHt$RQ4MgrgkeI43yCDNb{Svz+5R7r4kJ
zE^~#eT;n=7xXCSUbBDX!<30~~$Ri%}gr_{?IWKt0D_--4x4h#$ANa^8KJ$gIeB(Pm
z_{lGR^M}9uYwG=v00blufeAuTf)Sh$gd`N92}4-I5uOM{BodK{LR6v=ofyO<7O{y#
zT;dU*1SBL8iAh3Il98Mgq$CxoNkdxFk)8}>Bomp*LRPYoogCyO7rDtpUh<Kj0u-bW
zg(*T&icy>rl%y1;DMMMxQJxA^q!N{>LRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esV
zw4@cSX+vAu(Vh-;q!XR#LRY%cogVb07rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{
zF`fxbWD=8^!c?X)of*tz7PFbdT;?&K1uSF{i&?@_ma&`_tYj6dS;Jb^v7QZVWD}d&
z!dAAiogM6C7rWWRUiPt{103WKhdIJgj&Yn5oa7XzIm21bah?lY<Pw*;!d0$uog3Wb
z7Pq;>UG8z82R!5vk9opVp7ER)yyO+HdBa=Y@tzNS<P)Fy!dJfWoge(<7r*(#U;Z`p
z|3?4<5{SSAAt=EJP6$F0iqM21Ea3=G1R@fN$V4G3(TGkAViJqk#33&6h))6%l8D44
zAt}j7P6|?ziqxbbE$K*41~QU~%w!=e*~m@~a*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<
ziqe#!EafOq1u9aB%2c5$)u>JlYEp~Z)S)i*s80hL(ul@1p()L1P77Mniq^EDE$wJe
z2RhP;&UB$G-RMpadeV#D^r0{P=+6KKGKj$pVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?
z)0oZ-W-^P}%waC`n9l+hvWUejVJXX4&I(qtiq))PE$dj%1~#&Z&1_*S+t|(ycCw4z
z>|rna*v|nDa)`qm;V8#A&IwL(iqo9oEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~
z@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN&wSx4-}ufCe)5ao{NXSEhI#%Y009X^V1f{o
zU<4-wAqhoj!Vs2lgeL+Ki9}?g5S3^|Ck8QzMQq{_mw3b{0SQS&Vv>-QWF#jADM>|Y
z(vX&Pq$dLz$wX$dkd<s?CkHvnMQ-wtmwe=>00k*TVTw?cViczYB`HN|%21Yal&1m}
zsYGR}P?c&_rv^2tMQ!R(mwMEv0S#$HW17&EW;CY-Eont-+R&DEw5J0d=|pF`(3Ng<
zrw2XhMQ{4hmwxnT00SAsV1_W1VGL&kBN@eL#xRy~jAsH9nZ#tKFqLUcX9hEw#cbv<
zmwC)*0Sj5gVwSL!Wh`d}D_O;A*07d!tY-ro*~DhHu$66WX9qjk#cuYnmwoK#00%k5
zVUBQ=V;tuMCppDw&Ty7<oaX`;xx{6zaFuIZ=LR>q#cl3zmwVjj0S|e^W1jGoXFTTx
zFL}jl-td-pyypWS`NU_w@Re_T=LbLe#c%%bmw$tN{}F(I1R^j&2ud)56M~S0A~azL
zOE|(4frvyRGEs<1G@=uOn8YGBafnMi;*)@cBqA|MNJ=u2lY*3_A~k79OFGh%fsAA#
zGg-(=HnNk0oa7=mdB{sX@>76<6rwOiC`vJkQ-YF|qBLbFOF7C@fr?b3GF7NbHL6pC
zn$)5;b*M`{>eGORG@>z0Xi77h(}I??qBU)3OFP=rfsS;dGhOIPH@ee<p7f$OedtR+
z`ZIum3}P@t7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!G^R6ynapA~bC}CK=Cgo>EMhTB
zSjsY%vx1eZVl``6%R1JxfsJfpGh5ioHny{ao$O*ad)Ui9_H%%P9O5uXILa}ObApqc
z;xuPC%Q?<-fs0(?GFQ0DHLi1mo800yceu+v?(=|$JmN7=c*--L^MaSW;x%u0%RAol
zfscIRGhg`1H@@?OpZwxCfB4J4A>RK8KtKW!m>>it7{LiaNJ0^sFoY!>;fX**A`zJ=
zL?s&0i9t+a5t}%~B_8ofKtd9cm?R`68OcdON>Y)UG^8aR>B&GwGLe}qWF;Hf$w5wX
zk()f^B_H`IKtT#om?9LV7{w_;NlH<gGL)qp<*7hLDp8p#RHYi#sX<L@QJXr{r5^QZ
zKtmeQm?ku(8O>=yOIp#IHngQ3?dd>AI?<Ufbfp{J=|N9=(VIT>r62tnz(58um>~>h
z7{eLCNJcT5F^pv#<C(xjCNY^QOl2C=nZZnEF`GHeWghccz(N+Wm?bP_8OvG0N>;I&
zHLPVF>)F6YHnEv4Y-JnU*}+bBv70^YWgq)Fz(Edim?IqJ7{@umNltN^Go0ld=efW|
zE^(PFT;&?qxxr0tahp5b<sSEWz(XGKm?u2t8P9paOJ4DsH@xK?@A<$-KJl3^eB~S8
z`N2<q@tZ&V<=+7Re*_>Pfe1_xf)b42gdilL2u&Em5{~dhAR>{7OcbILjp)Q6Cb5W3
z9O4p>_#_}9iAYQml9G(%q#z}!NKG2jl8*FbAS0Q`Oct_|jqKzgC%MQ?9`cfp{1l)d
zg(yrBic*Z?l%OP~C`}p4QjYRepdyv1Ockn9jq22(Cbg(d9qLk#`ZS;+jc800n$nEs
zw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad{tRFsgBZ*ZhBA!dj9?_A7|j^Q
zGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GOma>fHtY9UpSj`&NvX1p^U?ZE@
z%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<IL#T(a*p#{;3Ai}%oVP3jqBXt
zCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>;3J>-%oo1$jqm*6C%^d3AO7+$
zyyHIt5RgCwCI~?ZMsPw9l2C*u3}FdJcp?yyNJJ(IQHe%$Vi1#9#3l}LiAQ`AkdQ<q
zCJ9MNMsiY+l2oK74QWY7dNPoaOk^etS;<Cra*&f;<R%Y!$wz()P>@0trU*qTMsZ3|
zl2VkW3}q=tc`8tmN>ru_RjEdGYEY9})TRz~sYiVp(2zznrU^}HMsr%wl2){)4Q**h
zdpgjOPIRUVUFk-5deD<z^rjDe=|_JCFpxnEW(Y$W#&AY3l2MFi3}YF`cqTBBNla!6
zQ<=teW-yak%w`UAna6w<u#iP8W(iAK#&TA$l2xo`4QpA)dN#0;O>AZhTiM2TcCeFO
z>}C&p*~fkkaF9bB<_JeQ#&J$?l2e@K3}-pVc`k5~OI+p(SGmS@Zg7)Z+~y8<xyOAT
z@Q_D5<_S-E#&cfql2^Ru4R3kJdp_`yPkiPJU-`y&e(;lD{N@jT`4`;t9{~tRAOaJF
zpadg0AqYt*LKB9tgd;o=h)5(N6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2w
zQj><Xq$52U$VetKlZC8gBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTq
zs7NI$Q-!KjqdGOHNiAwqhq~0GJ`HF{BO23$rZl5DEoezATGNKMw4*&8=tw6z(}k{d
zqdPt5NiTZShraZqKLZ%ZAO<spp$ua<BN)jjMl*)7jAJ|#n8+k1Gli*4V>&aK$t-3w
zhq=sSJ_}gLA{MiRr7UAPD_F@YR<nk+tYbYJ*vKX}vxTi}V>>(8$u4%YhrR4$KL<F-
zAr5ndqa5QnCpgI|PIHE{oZ~zfxX2|gbA_v1<2pCE$t`Ykhr8V4J`Z@vBOddFr#$01
zFL=o-Uh{^xyyHC|_{b+d^M$W`<2yh2$uEBMhrj#_?fQ=Z1SAlF2|`eU5u6Z&Bov_u
zLs-HQo(M!F5|N2QRH6}`7{nwNv57-m;t`(&BqR}uNkUSRk(?ByBo(PiLt4_2o(yCp
z6Pd|ER<e<u9ONVyxyeIb@{ykc6r>P^DMC?-QJfN#q!gtoLs`mEo(fc?5|yb!RjN^)
z8q}l~wW&j0>QSEtG^7!YX+l$)(VP~vq!q1cLtEO>o(^=R6P@WoSGv)i9`vLaz3D?=
z`q7^O3}g_48NyJ8F`N;MWE7(r!&t^Ko(W835|f$2RHiYV8O&rBvzfzO<}sfIEMyUj
zS;A75v78mGWEHDf!&=s{o(*hd6Pww>R<^O79qeQmyV=8D_OYJ>9OMv(Il@tnahwyJ
z<P@hl!&%O8o(o*$5|_EcRjzTJ8{Fg;x4FYz?s1<7Jme9NdBRhk@thaD<Q1=Z!&~0*
zo)3KF6QB9QSHAI`AN=GOzxl&o{ss2^M*sp6h`<CPD8UF$2tpEy(1al@;RsIzA`*$n
zL?J5Ch)xV*5{uZxAujQVPXZE>h{PlzDalAq3R04a)TALT=}1ooGLnhRWFafr$W9J&
zl8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)PQj6Nup)U2P
zPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD&j1E8h`|hD
zD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^nDa%;S3Rbd;
z)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~)12Wf=Qz&=
zE^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#KJtmreBmqK
z_|6Z0@{8a6;V=KfI{!l;=>Gx|h`<CPD8UF$2tpEy(1al@;RsIzA`*$nL?J5Ch)xV*
z5{uZxAujQVPXZE>h{PlzDalAq3R04a)TALT>5y&uzYJs~6Pd|ER<e<u9ONVyxyeIb
z@{ykc6r>P^DMC?-QJfN#q!gtoLs`mEo(fc?5|yb!RjN^)8q}l~wW&j0>QSEtG^7!Y
zX+l$)(VP~vq!q1cLtEO>o(^=R6P@WoSGv)i9`vLaz3D?=`q7^O3}g_48NyJ8F`N;M
zWE7(r!&t^Ko(W835|f$2RHiYV8O&rBvzfzO<}sfIEMyUjS;A75v78mGWEHDf!&=s{
zo(*hd6Pww>R<^O79qeQmyV=8D_OYJ>9OMv(Il@tnahwyJ<P@hl!&%O8o(o*$5|_Ec
zRjzTJ8{Fg;x4FYz?s1<7Jme9NdBRhk@thaD<Q1=Z!&~0*o)3KF6QB9QSHAI`AN=GO
zzxl&o{sr~^M*sp6h`<CPD8UF$2tpEy(1al@;RsIzA`*$nL?J5Ch)xV*5{uZxAujQV
zPXZE>h{PlzDalAq3R04a)TALT=}1ooGLnhRWFafr$W9J&l8fBrAusvJPXP*2h{6=1
zD8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)PQj6Nup)U2PPXij#h{iObDa~k33tG~O
z*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD&j1E8h`|hDD8m@e2u3oB(Trg%;~38b
zCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^nDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g
z*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC
z;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#KJtmreBmqK_|6Z0@{8a6;V=I}y8j~p
z0SQE4f)JEo1SbR`2}Nka5SDO+Cjt?PL}a26m1smK1~G|6Y~m1?c*G|G2}wj^l8}^S
zBqs$aNkwYXkd}0$Cj%MDL}s#(m26}u2RX?_Zt{?qeB`G91t~;ficpkd6sH6wDMe|@
zP?mC(rveqJL}jW_m1<O{1~sWgZR${$deo-@4QWJUn$VPHG^YhEX+>+=(3W<zrvn}7
zL}$9tm2PyW2R-RUZ~D-ee)MMm0~y3%hA@<23}*x*8O3PEFqUzQX95$M#AK#0m1#_8
z1~Zw(Z00bRdCX@43t7Zsmavp%EN2BPS;cDBu$FbKX9FAA#Addzm2GTi2RqrtZuYR3
zeeCA|2RX!Hj&PJ?9OnclImKztaF%nN=K>eG#AU8<m1|t*1~<9IZSHWFd)(&%4|&96
zp74}sJm&>3dBtnq@RoPH=K~-4#Am+nm2Z6K2S546Z~pL?e*yjf5rBXMA}~P+N-%;G
zf{=tFG+_u!IKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;lYoRIA~8uwN-~m@f|R5pHEBpo
zI?|JYjASA+S;$H@vXg_H<RUkD$V)!*Q-FdLqA*1$N->I4f|8V?G-W7DIm%Okid3R9
zRj5ies#AlS)S@<Zs7pQS(}0FFqA^WqN;8_%f|j(RHEn21JKEEMj&!0kUFb?Ty3>Q6
z^rAO?=u1EPGk}2%VlYD($}omAf{~13G-DXcIL0%9iA-WLQ<%y$rZa<?%wjfkn9Dro
zvw(#xVlhit$}*O-f|aadHEUSQI@Ys+jcj5wTiD7rwzGqs>|!^2*vmflbAW>!;xI=z
z$}x^}f|H!$G-o)=InHx|i(KL|SGdYGu5*K%+~PKOxXV56^MHpu;xSKn$}^txf|tDF
zHE(#!JKpnwk9^`YU--&5zVm~h{Ngu%_{+cL9RCr3fCM5iK?q7Pf)j#}gd#Ly2unD^
z6M=|CA~I2kN;IMqgP6o3HgSkcJmQmpgd`#{Nk~dEl9Pgzq#`wGNJ~1>lYxw6A~RXY
zN;a~SgPi0dH+jfQKJrt5f)t`KMJP%!ic^A;l%h0cC`&oYQ-O+9qB2#eN;RregPPQ$
zHg%{=J?hhdhBTrvO=wCpn$v=ow4ya_XiGcV(}9k3qBC9SN;kUGgP!!FH+|?!Kl(F(
zfed0WLm0|1hBJbZjAArn7|S@uGl7XrVlq>h$~2}kgPF`?HglNEJm#~2g)Cw*OIXS>
zma~GDtYS55Sj#%rvw@9lVl!LV$~LyMgPrVRH+$I2KK65fgB;>8M>xtcj&p*OoZ>WR
zILkTCbAgLo;xbpb$~CTYgPYvqHg~woJ?`^>hdkmjPk72Rp7Vm2yy7))c*{H9^MQ|i
z;xk|P$~V6AgP;83H-Grczr{TN5rBXMA}~P+N-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3
zq7#Fd#3D9vh)X=;lYoRIA~8uwN-~m@f|R5pHEBpoI?|JYjASA+S;$H@vXg_H<RUkD
z$V)!*Q-FdLqA*1$N->I4f|8V?G-W7DIm%Okid3R9Rj5ies#AlS)S@<Zs7pQS(}0FF
zqA^WqN;8_%f|j(RHEn21JKEEMj&!0kUFb?Ty3>Q6^rAO?=u1EPGk}2%VlYD($}omA
zf{~13G-DXcIL0%9iA-WLQ<%y$rZa<?%wjfkn9Drovw(#xVlhit$}*O-f|aadHEUSQ
zI@Ys+jcj5wTiD7rwzGqs>|!^2*vmflbAW>!;xI=z$}x^}f|H!$G-o)=InHx|i(KL|
zSGdYGu5*K%+~PKOxXV56^MHpu;xSKn$}^txf|tDFHE(#!JKpnwk9^`YU--&5zVm~h
z{Ngu%_{+bgT>lY(fCM5iK?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkc
zJmQmpgd`#{Nk~dEl9Pgzq#`wGNJ~1>lYxw6A~RXYN;a~SgPi0dH+jfQKJrt5f)t`K
zMJP%!ic^A;l%h0cC`&oYQ-O+9qB2#eN;RregPPQ$Hg%{=J?hhdhBTrvO=wCpn$v=o
zw4ya_XiGcV(}9k3qBC9SN;kUGgP!!FH+|?!Kl(F(fed0WLm0|1hBJbZjAArn7|S@u
zGl7XrVlq>h$~2}kgPF`?HglNEJm#~2g)Cw*OIXS>ma~GDtYS55Sj#%rvw@9lVl!LV
z$~LyMgPrVRH+$I2KK65fgB;>8M>xtcj&p*OoZ>WRILkTCbAgLo;xbpb$~CTYgPYvq
zHg~woJ?`^>hdkmjPk72Rp7Vm2yy7))c*{H9^MQ|i;xk|P$~V6AgP;83H-GrczlD7N
z5rBXMA}~P+N-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;lYoRIA~8uw
zN-~m@f|R5pHEBpoI?|JYjASA+S;$H@vXg_H<RUkD$V)!*Q-FdLqA*1$N->I4f|8V?
zG-W7DIm%Okid3R9Rj5ies#AlS)S@<Zs7pQS(}0FFqA^WqN;8_%f|j(RHEn21JKEEM
zj&!0kUFb?Ty3>Q6^rAO?=u1EPGk}2%VlYD($}omAf{~13G-DXcIL0%9iA-WLQ<%y$
zrZa<?%wjfkn9Drovw(#xVlhit$}*O-f|aadHEUSQI@Ys+jcj5wTiD7rwzGqs>|!^2
z*vmflbAW>!;xI=z$}x^}f|H!$G-o)=InHx|i(KL|SGdYGu5*K%+~PKOxXV56^MHpu
z;xSKn$}^txf|tDFHE(#!JKpnwk9^`YU--&5zVm~h{Ngu%_{+a#oc|GkfCM5iK?q7P
zf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkcJmQmpgd`#{Nk~dEl9Pgzq#`wG
zNJ~1>lYxw6A~RXYN;a~SgPi0dH+jfQKJrt5f)t`KMJP%!ic^A;l%h0cC`&oYQ-O+9
zqB2#eN;RregPPQ$Hg%{=J?hhdhBTrvO=wCpn$v=ow4ya_XiGcV(}9k3qBC9SN;kUG
zgP!!FH+|?!Kl(F(fed0WLm0|1hBJbZjAArn7|S@uGl7XrVlq>h$~2}kgPF`?HglNE
zJm#~2g)Cw*OIXS>ma~GDtYS55Sj#%rvw@9lVl!LV$~LyMgPrVRH+$I2KK65fgB;>8
zM>xtcj&p*OoZ>WRILkTCbAgLo;xbpb$~CTYgPYvqHg~woJ?`^>hdkmjPk72Rp7Vm2
zyy7))c*{H9^MQ|i;xk|P$~V6AgP;83H-GrczeT+N5rBXMA}~P+N-%;Gf{=tFG+_u!
zIKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;lYoRIA~8uwN-~m@f|R5pHEBpoI?|JYjASA+
zS;$H@vXg_H<RUkD$V)!*Q-FdLqA*1$N->I4f|8V?G-W7DIm%Okid3R9Rj5ies#AlS
z)S@<Zs7pQS(}0FFqA^WqN;8_%f|j(RHEn21JKEEMj&!0kUFb?Ty3>Q6^rAO?=u1EP
zGk}2%VlYD($}omAf{~13G-DXcIL0%9iA-WLQ<%y$rZa<?%wjfkn9Drovw(#xVlhit
z$}*O-f|aadHEUSQI@Ys+jcj5wTiD7rwzGqs>|!^2*vmflbAW>!;xI=z$}x^}f|H!$
zG-o)=InHx|i(KL|SGdYGu5*K%+~PKOxXV56^MHpu;xSKn$}^txf|tDFHE(#!JKpnw
zk9^`YU--&5zVm~h{Ngu%_{+Z~-2V}PfCM5iK?q7Pf)j#}gd#Ly2unD^6M=|CA~I2k
zN;IMqgP6o3HgSkcJmQmpgd`#{Nk~dEl9Pgzq#`wGNJ~1>lYxw6A~RXYN;a~SgPi0d
zH+jfQKJrt5f)t`KMJP%!ic^A;l%h0cC`&oYQ-O+9qB2#eN;RregPPQ$Hg%{=J?hhd
zhBTrvO=wCpn$v=ow4ya_XiGcV(}9k3qBC9SN;kUGgP!!FH+|?!Kl(F(fed0WLm0|1
zhBJbZjAArn7|S@uGl7XrVlq>h$~2}kgPF`?HglNEJm#~2g)Cw*OIXS>ma~GDtYS55
zSj#%rvw@9lVl!LV$~LyMgPrVRH+$I2KK65fgB;>8M>xtcj&p*OoZ>WRILkTCbAgLo
z;xbpb$~CTYgPYvqHg~woJ?`^>hdkmjPk72Rp7Vm2yy7))c*{H9^MQ|i;xk|P$~V6A
zgP;83H-Grc|Kh2C0ub>37^a78F%SR%V11q1wtj8fwr$(CZQHhO+qP|+lSPgYf)JEo
z1SbR`2}Nka5SDO+Cjt?PL}a26m1smK1~G|6Y~m1?c*G|G0VE_5iAh3Il98Mgq$Cxo
zNkdxFk)8}>Bomp*LRPYoogCyO7rDtpUh<Kj0u-bWg(*T&icy>rl%y1;DMMMxQJxA^
zq!N{>LRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esVw4@cSX+vAu(Vh-;q!XR#LRY%c
zogVb07rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{F`fxbWD=8^!c?X)of*tz7PFbd
zT;?&K1uSF{i&?@_ma&`_tYj6dS;Jb^v7QZVWD}d&!dAAiogM6C7rWWRUiPt{103WK
zhdIJgj&Yn5oa7XzIm21bah?lY<Pw*;!d0$uog3Wb7Pq;>UG8z82R!5vk9opVp7ER)
zyyO+HdBa=Y@tzNS<P)Fy!dJfWoge(<7r*(#U;Ys&z<&bsA3+F8FoF|;kc1*MVF*h&
z!V`grL?SX#h)Oh~6N8w<A~tb|OFZI}fB+Jbh{PlzDalAq3R04a)TALT=}1ooGLnhR
zWFafr$W9J&l8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)P
zQj6Nup)U2PPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD
z&j1E8h`|hDD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^n
zDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~
z)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#
zKJtmreBmqK_|6Z0@{8a6;V=IPl+b?y^B+M7N-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3
zq7#Fd#3D9vh)X=;lYjscl8D44At}j7P6|?ziqxbbE$K*41~QU~%w!=e*~m@~a*~VO
z<RLHl$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq1u9aB%2c5$)u>JlYEp~Z)S)i*s80hL
z(ul@1p()L1P77Mniq^EDE$wJe2RhP;&UB$G-RMpadeV#D^r0{P=+6KKGKj$pVJO2G
z&Im>_iqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}%waC`n9l+hvWUejVJXX4&I(qtiq))P
zE$dj%1~#&Z&1_*S+t|(ycCw4z>|rna*v|nDa)`qm;V8#A&IwL(iqo9oEay1S1uk-l
z%Ut0q*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN&wSx4-}ufC
ze)5ao{NXSE2$aZw0`nh12ud)56M~S0A~azLOE|(4frvyRGEs<1G@=uOn8YGBafnMi
z;*)>?5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|
z6rm`^C{77VQi{@)p)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S
z(u&r!p)KubPX{{EiOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq
z&jcniiOEc1D$|(G3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<b
zE8E!44tBDO-Rxm6``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia
z+uY$U_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b({|J=W
ze**I#K?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkcJmQmp01}dj#3Ugp
z$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%0SZ!x!W5w>#VAe*N>Yl_
zl%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR&1g;wTGEQvw4p8SXio<^
z(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2!x+v8Mly=gjA1O}7|#SIGKtAdVJg#@
z&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erKtYIzdSkDGFvWd-XVJq9%&JK36i{0#D
zFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-&JAvIi`(4cF88?410M2-
z$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=FaHRX#D4<wA3+F8FoF|;
zkc1*MVF*h&!V`grL?SX#h)Oh~6N8w<A~tb|OFZI}fB+Jbh{PlzDalAq3R04a)TALT
z=}1ooGLnhRWFafr$W9J&l8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBc
zRG})>s7?)PQj6Nup)U2PPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM
z(u>~op)dXD&j1E8h`|hDD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES
z&jJ>*h{Y^nDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=
zD91R?2~Ki~)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy
z*Sz5^?|9D#KJtmreBmqK_|6Z0@{8a6;V=IPl+=F$^B+M7N-%;Gf{=tFG+_u!IKmTw
zh(sbXQHV-3q7#Fd#3D9vh)X=;lYjscl8D44At}j7P6|?ziqxbbE$K*41~QU~%w!=e
z*~m@~a*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq1u9aB%2c5$)u>JlYEp~Z
z)S)i*s80hL(ul@1p()L1P77Mniq^EDE$wJe2RhP;&UB$G-RMpadeV#D^r0{P=+6KK
zGKj$pVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}%waC`n9l+hvWUejVJXX4
z&I(qtiq))PE$dj%1~#&Z&1_*S+t|(ycCw4z>|rna*v|nDa)`qm;V8#A&IwL(iqo9o
zEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN
z&wSx4-}ufCe)5ao{NXSE2$al!0`nh12ud)56M~S0A~azLOE|(4frvyRGEs<1G@=uO
zn8YGBafnMi;*)>?5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI
z`N&TJ3Q~x|6rm`^C{77VQi{@)p)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cy
zG@&WYXif`S(u&r!p)KubPX{{EiOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYr
zGK$fRVJzbq&jcniiOEc1D$|(G3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e
z&jvQKiOp<bE8E!44tBDO-Rxm6``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!
zD%ZHq4Q_Ia+uY$U_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C
z-~8b({|J=ae**I#K?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkcJmQmp
z01}dj#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%0SZ!x!W5w>
z#VAe*N>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR&1g;wTGEQv
zw4p8SXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2!x+v8Mly=gjA1O}7|#SI
zGKtAdVJg#@&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erKtYIzdSkDGFvWd-XVJq9%
z&JK36i{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-&JAvIi`(4c
zF88?410M2-$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=FaHRX!hZtu
zA3+F8FoF|;kc1*MVF*h&!V`grL?SX#h)Oh~6N8w<A~tb|OFZI}fB+Jbh{PlzDalAq
z3R04a)TALT=}1ooGLnhRWFafr$W9J&l8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br
z<tR@DDpHBcRG})>s7?)PQj6Nup)U2PPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>G
zbfGKV=uQuM(u>~op)dXD&j1E8h`|hDD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAl
zGK<;FVJ`ES&jJ>*h{Y^nDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG
z&jAi{h{GJ=D91R?2~Ki~)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtP
zDbIM$3tsYy*Sz5^?|9D#KJtmreBmqK_|6Z0@{8a6;V=IPl+u3!^B+M7N-%;Gf{=tF
zG+_u!IKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;lYjscl8D44At}j7P6|?ziqxbbE$K*4
z1~QU~%w!=e*~m@~a*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq1u9aB%2c5$
z)u>JlYEp~Z)S)i*s80hL(ul@1p()L1P77Mniq^EDE$wJe2RhP;&UB$G-RMpadeV#D
z^r0{P=+6KKGKj$pVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}%waC`n9l+h
zvWUejVJXX4&I(qtiq))PE$dj%1~#&Z&1_*S+t|(ycCw4z>|rna*v|nDa)`qm;V8#A
z&IwL(iqo9oEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}&I?}hir2j1
zE$?{G2R`zN&wSx4-}ufCe)5ao{NXSE2$afy0`nh12ud)56M~S0A~azLOE|(4frvyR
zGEs<1G@=uOn8YGBafnMi;*)>?5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S
z4sw!<+~grI`N&TJ3Q~x|6rm`^C{77VQi{@)p)BPnPX#JciON)=D%Ge?4Qf)0+SH*g
z^{7t+8q$cyG@&WYXif`S(u&r!p)KubPX{{EiOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j
z3}Gn47|sYrGK$fRVJzbq&jcniiOEc1D$|(G3}!Nm+00=s^O(;97P5%NEMY0jSk4Mo
zvWnHLVJ+)e&jvQKiOp<bE8E!44tBDO-Rxm6``FI`4swXY9N{R(IL--9a*ETO;VkDk
z&jl`WiOXE!D%ZHq4Q_Ia+uY$U_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oD
zE8qCe4}S8C-~8b({|J=Ye**I#K?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3
zHgSkcJmQmp01}dj#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%
z0SZ!x!W5w>#VAe*N>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR
z&1g;wTGEQvw4p8SXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2!x+v8Mly=g
zjA1O}7|#SIGKtAdVJg#@&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erKtYIzdSkDGF
zvWd-XVJq9%&JK36i{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-
z&JAvIi`(4cF88?410M2-$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=
zFaHRX#(x6yA3+F8FoF|;kc1*MVF*h&!V`grL?SX#h)Oh~6N8w<A~tb|OFZI}fB+Jb
zh{PlzDalAq3R04a)TALT=}1ooGLnhRWFafr$W9J&l8fBrAusvJPXP*2h{6=1D8(pF
z2})9m(v+br<tR@DDpHBcRG})>s7?)PQj6Nup)U2PPXij#h{iObDa~k33tG~O*0iB5
z?PyO2I?{>GbfGKV=uQuM(u>~op)dXD&j1E8h`|hDD8m@e2u3oB(Trg%;~38bCNhc1
zOkpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^nDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}i
zvWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M
z&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#KJtmreBmqK_|6Z0@{8a6;V=IPl-7R&^B+M7
zN-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;lYjscl8D44At}j7P6|?z
ziqxbbE$K*41~QU~%w!=e*~m@~a*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq
z1u9aB%2c5$)u>JlYEp~Z)S)i*s80hL(ul@1p()L1P77Mniq^EDE$wJe2RhP;&UB$G
z-RMpadeV#D^r0{P=+6KKGKj$pVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}
z%waC`n9l+hvWUejVJXX4&I(qtiq))PE$dj%1~#&Z&1_*S+t|(ycCw4z>|rna*v|nD
za)`qm;V8#A&IwL(iqo9oEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}
z&I?}hir2j1E$?{G2R`zN&wSx4-}ufCe)5ao{NXSE2$ar$0`nh12ud)56M~S0A~azL
zOE|(4frvyRGEs<1G@=uOn8YGBafnMi;*)>?5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQ
ziOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^C{77VQi{@)p)BPnPX#JciON)=D%Ge?
z4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!p)KubPX{{EiOzJPE8XZ$4|>vz-t?g_
z{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcniiOEc1D$|(G3}!Nm+00=s^O(;97P5%N
zEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!44tBDO-Rxm6``FI`4swXY9N{R(IL--9
za*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY
z&j&v8iO+oDE8qCe4}S8C-~8b({|J=ce**I#K?q7Pf)j#}gd#Ly2unD^6M=|CA~I2k
zN;IMqgP6o3HgSkcJmQmp01}dj#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRC
zi`?WPFZsw%0SZ!x!W5w>#VAe*N>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES
z0~*qZ#x$WR&1g;wTGEQvw4p8SXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2
z!x+v8Mly=gjA1O}7|#SIGKtAdVJg#@&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erK
ztYIzdSkDGFvWd-XVJq9%&JK36i{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$x
za*4}a;VRd-&JAvIi`(4cF88?410M2-$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x
z&JTX_i{Jd=FaHRX!G8ktA3+F8FoF|;kc1*MVF*h&!V`grL?SX#h)Oh~6N8w<A~tb|
zOFZI}fB+Jbh{PlzDalAq3R04a)TALT=}1ooGLnhRWFafr$W9J&l8fBrAusvJPXP*2
zh{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)PQj6Nup)U2PPXij#h{iObDa~k3
z3tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD&j1E8h`|hDD8m@e2u3oB(Trg%
z;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^nDa%;S3Rbd;)vRGH>sZeQHnNG$
zY+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~)12Wf=Qz&=E^>*>T;VF$xXul3
za*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#KJtmreBmqK_|6Z0@{8a6;V=IP
zl+k|z^B+M7N-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;lYjscl8D44
zAt}j7P6|?ziqxbbE$K*41~QU~%w!=e*~m@~a*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<
ziqe#!EafOq1u9aB%2c5$)u>JlYEp~Z)S)i*s80hL(ul@1p()L1P77Mniq^EDE$wJe
z2RhP;&UB$G-RMpadeV#D^r0{P=+6KKGKj$pVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?
z)0oZ-W-^P}%waC`n9l+hvWUejVJXX4&I(qtiq))PE$dj%1~#&Z&1_*S+t|(ycCw4z
z>|rna*v|nDa)`qm;V8#A&IwL(iqo9oEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~
z@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN&wSx4-}ufCe)5ao{NXSE2$acx0`nh12ud)5
z6M~S0A~azLOE|(4frvyRGEs<1G@=uOn8YGBafnMi;*)>?5|W6-Bq1ruNKOh;l8V%%
zAuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^C{77VQi{@)p)BPnPX#Jc
ziON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!p)KubPX{{EiOzJPE8XZ$
z4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcniiOEc1D$|(G3}!Nm+00=s
z^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!44tBDO-Rxm6``FI`4swXY
z9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U_qfjk9`cCCJmD$Nc+Lx6
z@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b({|J=Xe**I#K?q7Pf)j#}gd#Ly2unD^
z6M=|CA~I2kN;IMqgP6o3HgSkcJmQmp01}dj#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@
zAuHL)P7ZRCi`?WPFZsw%0SZ!x!W5w>#VAe*N>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{O
zi`vwoF7>ES0~*qZ#x$WR&1g;wTGEQvw4p8SXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax
z00uIM!3<$2!x+v8Mly=gjA1O}7|#SIGKtAdVJg#@&J1QUi`mR!F7uer0v57}#Vlbd
z%UI3|R<erKtYIzdSkDGFvWd-XVJq9%&JK36i{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LV
zoZ&3zIL`$xa*4}a;VRd-&JAvIi`(4cF88?410M2-$2{RF&v?!YUh<09yx}eHc+Uqu
z@`=xU;Va+x&JTX_i{Jd=FaHRX#eV|xA3+F8FoF|;kc1*MVF*h&!V`grL?SX#h)Oh~
z6N8w<A~tb|OFZI}fB+Jbh{PlzDalAq3R04a)TALT=}1ooGLnhRWFafr$W9J&l8fBr
zAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)PQj6Nup)U2PPXij#
zh{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD&j1E8h`|hDD8m@e
z2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^nDa%;S3Rbd;)vRGH
z>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~)12Wf=Qz&=E^>*>
zT;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#KJtmreBmqK_|6Z0
z@{8a6;V=IPl+}L%^B+M7N-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;
zlYjscl8D44At}j7P6|?ziqxbbE$K*41~QU~%w!=e*~m@~a*~VO<RLHl$WH+ZQi#G7
zp(w>DP6<j<iqe#!EafOq1u9aB%2c5$)u>JlYEp~Z)S)i*s80hL(ul@1p()L1P77Mn
ziq^EDE$wJe2RhP;&UB$G-RMpadeV#D^r0{P=+6KKGKj$pVJO2G&Im>_iqVW=EaMo@
z1ST?x$xLA?)0oZ-W-^P}%waC`n9l+hvWUejVJXX4&I(qtiq))PE$dj%1~#&Z&1_*S
z+t|(ycCw4z>|rna*v|nDa)`qm;V8#A&IwL(iqo9oEay1S1uk-l%Ut0q*SO9NZgPv;
z+~F?wxX%L~@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN&wSx4-}ufCe)5ao{NXSE2$ao#
z0`nh12ud)56M~S0A~azLOE|(4frvyRGEs<1G@=uOn8YGBafnMi;*)>?5|W6-Bq1ru
zNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^C{77VQi{@)
zp)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!p)KubPX{{E
ziOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcniiOEc1D$|(G
z3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!44tBDO-Rxm6
z``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U_qfjk9`cCC
zJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b({|J=be**I#K?q7Pf)j#}
zgd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkcJmQmp01}dj#3Ugp$w*ELQj&_)q#-To
zNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%0SZ!x!W5w>#VAe*N>Yl_l%Xu;C{G0{Qi;k`
zp(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR&1g;wTGEQvw4p8SXio<^(uvM=p)1|!P7iw0
zi{A91Fa7Ax00uIM!3<$2!x+v8Mly=gjA1O}7|#SIGKtAdVJg#@&J1QUi`mR!F7uer
z0v57}#Vlbd%UI3|R<erKtYIzdSkDGFvWd-XVJq9%&JK36i{0#DFZ<Zf0S<DA!yMr#
z$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-&JAvIi`(4cF88?410M2-$2{RF&v?!YUh<09
zyx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=FaHRX!+!$vA3+F8FoF|;kc1*MVF*h&!V`gr
zL?SX#h)Oh~6N8w<A~tb|OFZI}fB+Jbh{PlzDalAq3R04a)TALT=}1ooGLnhRWFafr
z$W9J&l8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)PQj6Nu
zp)U2PPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD&j1E8
zh`|hDD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^nDa%;S
z3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~)12Wf
z=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#KJtmr
zeBmqK_|6Z0@{8a6;V=IPl+%9#^B+M7N-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3q7#Fd
z#3D9vh)X=;lYjscl8D44At}j7P6|?ziqxbbE$K*41~QU~%w!=e*~m@~a*~VO<RLHl
z$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq1u9aB%2c5$)u>JlYEp~Z)S)i*s80hL(ul@1
zp()L1P77Mniq^EDE$wJe2RhP;&UB$G-RMpadeV#D^r0{P=+6KKGKj$pVJO2G&Im>_
ziqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}%waC`n9l+hvWUejVJXX4&I(qtiq))PE$dj%
z1~#&Z&1_*S+t|(ycCw4z>|rna*v|nDa)`qm;V8#A&IwL(iqo9oEay1S1uk-l%Ut0q
z*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN&wSx4-}ufCe)5ao
z{NXSE2$aiz0`nh12ud)56M~S0A~azLOE|(4frvyRGEs<1G@=uOn8YGBafnMi;*)>?
z5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^
zC{77VQi{@)p)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!
zp)KubPX{{EiOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcni
ziOEc1D$|(G3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!4
z4tBDO-Rxm6``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U
z_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b({|J=Ze**I#
zK?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkcJmQmp01}dj#3Ugp$w*EL
zQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%0SZ!x!W5w>#VAe*N>Yl_l%Xu;
zC{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR&1g;wTGEQvw4p8SXio<^(uvM=
zp)1|!P7iw0i{A91Fa7Ax00uIM!3<$2!x+v8Mly=gjA1O}7|#SIGKtAdVJg#@&J1QU
zi`mR!F7uer0v57}#Vlbd%UI3|R<erKtYIzdSkDGFvWd-XVJq9%&JK36i{0#DFZ<Zf
z0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-&JAvIi`(4cF88?410M2-$2{RF
z&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=FaHRX$A1FzA3+F8FoF|;kc1*M
zVF*h&!V`grL?SX#h)Oh~6N8w<A~tb|OFZI}fB+Jbh{PlzDalAq3R04a)TALT=}1oo
zGLnhRWFafr$W9J&l8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>
zs7?)PQj6Nup)U2PPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~o
zp)dXD&j1E8h`|hDD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*
zh{Y^nDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?
z2~Ki~)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^
z?|9D#KJtmreBmqK_|6Z0@{8a6;V=IPl-GX(^B+M7N-%;Gf{=tFG+_u!IKmTwh(sbX
zQHV-3q7#Fd#3D9vh)X=;lYjscl8D44At}j7P6|?ziqxbbE$K*41~QU~%w!=e*~m@~
za*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq1u9aB%2c5$)u>JlYEp~Z)S)i*
zs80hL(ul@1p()L1P77Mniq^EDE$wJe2RhP;&UB$G-RMpadeV#D^r0{P=+6KKGKj$p
zVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}%waC`n9l+hvWUejVJXX4&I(qt
ziq))PE$dj%1~#&Z&1_*S+t|(ycCw4z>|rna*v|nDa)`qm;V8#A&IwL(iqo9oEay1S
z1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN&wSx4
z-}ufCe)5ao{NXSE2$au%0`nh12ud)56M~S0A~azLOE|(4frvyRGEs<1G@=uOn8YGB
zafnMi;*)>?5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ
z3Q~x|6rm`^C{77VQi{@)p)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WY
zXif`S(u&r!p)KubPX{{EiOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fR
zVJzbq&jcniiOEc1D$|(G3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQK
ziOp<bE8E!44tBDO-Rxm6``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq
z4Q_Ia+uY$U_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b(
z{|J=de**JA1@#ath=N2>u%m6iwr$(CZQHhO+qP}nwryK~au&I$sP(x)2ud)56M~S0
zA~azLOE|(4frvyRGEs<10MUp}3}O<C*u)_&@rX|X5|W6-Bq1ruNKOh;l8V%%AuZ`h
zPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^C{77VQi{@)p)BPnPX#JciON)=
zD%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!p)KubPX{{EiOzJPE8XZ$4|>vz
z-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcniiOEc1D$|(G3}!Nm+00=s^O(;9
z7P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!44tBDO-Rxm6``FI`4swXY9N{R(
zIL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U_qfjk9`cCCJmD$Nc+Lx6@`~5I
z;VtiY&j&v8iO+oDE8qCe4}S8C-~8b(|M-tU1@%v0f)JEo1SbR`2}Nka5SDO+Cjt?P
zL}a26l>nj<ofyO<7O{y#T;dU*1SBL8iAh3Il98Mgq$CxoNkdxFk)8}>Bomp*LRPYo
zogCyO7rDtpUh<Kj0u-bWg(*T&icy>rl%y1;DMMMxQJxA^q!N{>LRG3!of_1n7PYBE
zUFuPv1~jA*jcGztn$esVw4@cSX+vAu(Vh-;q!XR#LRY%cogVb07rp62U;5FX0Ssgi
zgBik5hB2HGjARs}8N*n{F`fxbWD=8^!c?X)of*tz7PFbdT;?&K1uSF{i&?@_ma&`_
ztYj6dS;Jb^v7QZVWD}d&!dAAiogM6C7rWWRUiPt{103WKhdIJgj&Yn5oa7XzIm21b
zah?lY<Pw*;!d0$uog3Wb7Pq;>UG8z82R!5vk9opVp7ER)yyO+HdBa=Y@tzNS<P)Fy
z!dJfWoge(<7r*(#U;gnQfePuLzyu*E!3a(WLK2G5gdr^92u}ne5{bw}Au0hxBRVmN
zNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8gBRe_BNiK4ehrHw?
zKLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwqhq~0GJ`HF{BO23$
zrZl5DEoezATGNKMw4*&8=tw6z(}k{dqdPt5NiTZShraZqKLZ%ZAO<spp$ua<BN)jj
zMl*)7jAJ|#n8+k1Gli*4V>&aK$t-3whq=sSJ_}gLA{MiRr7UAPD_F@YR<nk+tYbYJ
z*vKX}vxTi}V>>(8$u4%YhrR4$KL<F-Ar5ndqa5QnCpgI|PIHE{oZ~zfxX2|gbA_v1
z<2pCE$t`Ykhr8V4J`Z@vBOddFr#$01FL=o-Uh{^xyyHC|_{b+d^M$W`<2yh2$uEBM
zhrj&eKLQojKY<BCP=XPh5QHQYp$S7+!V#VbL?jZCi9%EYh(>f`5R+KMCJu3lM|={H
zkVGUV2}wyta#E0zRHP;iX-P+VGLVr>WF`w)$wqc^kds{GCJ%YZM}7)WkU|uu2t_GI
zaY|5<Qk13)WhqB_Do~M1RHh15sYZ2bP?K8JrVe$fM|~R5kVZ772~BB6b6U`nR<x!K
zZD~h)I?$0$bfybk=|*>Y(34*DrVoATM}Gz|kU<P)2tygha7HkaQH*8`V;RSICNPmn
zOlAsGnZ|TxFq2u#W)5?i$9xvBkVPzJ2}@bVa#paCRjg(WYgxy7Hn5RRY-S5v*~WHu
zu#;WvW)FMW$9@iQkV72i2uC@_aZYfOQ=H}uXF11tE^v`cT;>W_xyE&FaFbiy<_>qc
z$9*2~kVib`2~T;(b6)V0SG?v8Z+XXiKJbxGeC7*Z`NnsC@RMKs<_~}Q$A1JWqJIJt
zgrEc?I3Wm0C_)p4u!JK#5r{}6A`^wE1Q3nr#2_ZIh)o>g5|8*KAR&oJOcIikjO3&s
zC8<bF8q$)E^kg6-naE5QvXYJL<RB-x$W0#dl8^ippdf`POc9DwjN+7_B&8@#8Ol<Q
z@>HNAm8eV=s#1;W)SxD{s7)Q}QjhvHpdpQDOcR>YjOMhUC9P;p8`{#2_H>{lo#;##
zy3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SMlbFmDrZSD`%wQ(7
zn9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w>|iIm*v%gHvXA{7
z;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5za*z8w;31EA%oCpS
zjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN|Q_{|^w@{j)rR8;>2CI~?ZMsPw9l2C*u
z3}FdJcp?yyNJJ(IQ3)U#(TPD!ViB7-#3df_NkBppk(eYTB^k*{K}u4Qnlz*(9qGwH
zMlz9^EMz4c*~vjpa*>-n<Ru^ZDL_FAQJ5kWr5MF2K}kwcnlhB79ObD%MJiF5DpaK!
z)u};EYEhdy)TJKvX+T37(U>MQr5Vj>K}%ZEnl`kh9qs8rM>^4&E_9_E-RVJ3deNIc
z^ravD8NfgWF_<9?Wf;R5!AM3inlX%J9OIe5L?$trDNJP=)0x3cW-*&N%w-<)S-?UT
zv6v++Wf{v^!Ae%Knl-Ft9qZY^MmDjTEo@~Q+u6ZRcCnj1>}4POIlw^<ahM|<<rv2~
z!AVYWnlqf`9Ot>fMJ{ofD_rFo*SWz>ZgHDC+~pqkdB8&+@t7w(<r&X;!AoB8nm4@V
z9q;+TM?UeHFMQ=2-}%8$e({?>{N*425vZ8{2}}@z5{%%4AS9s(O&G!wj_^bvB9Vwp
z6rvJ9G@=uOn8YGBafnMi;*)@cBqA|MNJ=u2lY*3_A~k79OFGh%fsAA#Gg-(=HnNk0
zoa7=mdB{sX@>76<6rwOiC`vJkQ-YF|qBLbFOF7C@fr?b3GF7NbHL6pCn$)5;b*M`{
z>eGORG@>z0Xi77h(}I??qBU)3OFP=rfsS;dGhOIPH@ee<p7f$OedtR+`ZIum3}P@t
z7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!G^R6ynapA~bC}CK=Cgo>EMhTBSjsY%vx1eZ
zVl``6%R1JxfsJfpGh5ioHny{ao$O*ad)Ui9_H%%P9O5uXILa}ObApqc;xuPC%Q?<-
zfs0(?GFQ0DHLi1mo800yceu+v?(=|$JmN7=c*--L^MaSW;x%u0%RAolfscIRGhg`1
zH@@?OpZwxCfB4Hk{v%Lv{S%lV1SJ^32|-9g5t=ZBB^=?2Ktv)DnJ7dhfM`S~1~G|6
zY~m1?c*G|G2}wj^l8}^SBqs$aNkwYXkd}0$Cj%MDL}s#(m26}u2RX?_Zt{?qeB`G9
z1t~;ficpkd6sH6wDMe|@P?mC(rveqJL}jW_m1<O{1~sWgZR${$deo-@4QWJUn$VPH
zG^YhEX+>+=(3W<zrvn}7L}$9tm2PyW2R-RUZ~D-ee)MMm0~y3%hA@<23}*x*8O3PE
zFqUzQX95$M#AK#0m1#_81~Zw(Z00bRdCX@43t7Zsmavp%EN2BPS;cDBu$FbKX9FAA
z#Addzm2GTi2RqrtZuYR3eeCA|2RX!Hj&PJ?9OnclImKztaF%nN=K>eG#AU8<m1|t*
z1~<9IZSHWFd)(&%4|&96p74}sJm&>3dBtnq@RoPH=K~-4#Am+nm2Z6K2S546Z~pL?
zfBZ+F68a}FK?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN&wM_P7Goai`c{=F7b#@0uqvl
z#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%0SZ!x!W5w>#VAe*
zN>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR&1g;wTGEQvw4p8S
zXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2!x+v8Mly=gjA1O}7|#SIGKtAd
zVJg#@&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erKtYIzdSkDGFvWd-XVJq9%&JK36
zi{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-&JAvIi`(4cF88?4
z10M2-$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=FaP+DKqd80V1f{o
zU<4-wAqhoj!Vs2lgeL+Ki9}?g5S0L;5uF&sBo?uWLtNq!p9CZ%5s67cQj(FJ6r>~-
zsYydx(vhAFWF!-r$wF4Lk)0gmBp12KLtgTcp8^!55QQm1QHoKV5|pGAr71&M%2A#Q
zRHPD>sX|q%QJospq!zWQLtW}op9VCf5shg=Q<~A77PO=lt!YDB+R>g4bfgoV=|We!
z(VZUjq!+#ELtpyQp8*VH5Q7=QP=+y_5sYLMqZz|k#xb4=Ok@(1nZi`2F`XIAWEQiT
z!(8Sup9L&r5sO*EQkJot6|7_xt69TZ*0G)qY-AIg*}_(~v7H_4WEZ>H!(R5Wp937^
z5QjO!QI2t(6P)A}r#Zt}&T*a#T;vj$xx!Vhah)67<QBKN!(Hxip9ehT5s!JoQ=ajh
z7rf*ZuX)2;-tnFfeB=|K`NCJe@tq(1<QKpB!(aaKAAw5gpTGnmD8UF$2tpEy(1al@
z;RsIzA`*$nL?J2xL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<DpHe%w4@_F8OTT`
zGLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd6{tuhDpQ53RHHgI
zs7WnqQ-`|LqdpC2NFy54gr+p3IW1^OD_YZrwzQ)?9q33WI@5)&bfY^x=t(bn(}%wF
zqdx-}$RGwYgrN*$I3pOzC`L1ev5aFp6PU;(CNqVpOk+ATn8_?=Gl#j%V?GO5$RZZA
zgrzKFIV)JnDps?GwX9=38`#JuHnWATY-2k+*vT$-vxmLxV?PHt$RQ4MgrgkeI43yC
zDNb{Svz+5R7r4kJE^~#eT;n=7xXCSUbBDX!<30~~$Ri%}gr_{?IWKt0D_--4x4h#$
zANa^8KJ$gIeB(Pm_{lGR^M}9u<39qG)<1y>LQsMcoDhU06rl-2Si%vW2t*_jk%>Z7
z0*FR*Vi1#9#3l}LiAQ`AkdQ<qCJ9MNMsiY+l2oK74QWY7dNPoaOk^etS;<Cra*&f;
z<R%Y!$wz()P>@0trU*qTMsZ3|l2VkW3}q=tc`8tmN>ru_RjEdGYEY9})TRz~sYiVp
z(2zznrU^}HMsr%wl2){)4Q**hdpgjOPIRUVUFk-5deD<z^rjDe=|_JCFpxnEW(Y$W
z#&AY3l2MFi3}YF`cqTBBNla!6Q<=teW-yak%w`UAna6w<u#iP8W(iAK#&TA$l2xo`
z4QpA)dN#0;O>AZhTiM2TcCeFO>}C&p*~fkkaF9bB<_JeQ#&J$?l2e@K3}-pVc`k5~
zOI+p(SGmS@Zg7)Z+~y8<xyOAT@Q_D5<_S-E#&cfql2^Ru4R3kJdp_`yPkiPJU-`y&
ze(;lD{N@jT`Nw|*Dx-e_6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@qs00v==)@o<v4~9^
z;u4SeBp@M)NK6uvl8oe}ASJ0tO&ZdYj`U<8Bbmrd7P69!?BpOPxyVf(@{*7I6rdo5
zC`=KGQjFr1pd_UzO&Q8kj`CEXB9*926{=E=>eQenwWv)U>QayTG@v1kXiO8D(v0S`
zpe3znO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cq
zj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF
z7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZb
zx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=zxd4`{_>Ci
z2vk=81SSYU2}W>25Ry=YCJbQ-M|dI-kw`=)3Q-9l8qtYCOkxq6IK(9$@ku~J5|NlB
zBqbTiNkK|dk(xB5B^~L>Kt?i=nJi=_8`;T0PI8f(Jme)G`6)m_3Q?FM6r~u&DM3j}
zQJON8r5xp{Kt(E1nJQGJ8r7*mO=?k_I@F~e^=Uvu8qt_0G^H8MX+cX`(V8~2r5)|*
zKu0>!nJ#pt8{O$aPkPatKJ=v@{TaYO1~Hf+3}qO@8No<KF`6-qWgO#~z(gi7nJG+V
z8q=A<OlC2gIm~4q^I5<`7O|KmEM*zXS;0zHv6?lkWgY9;z(zK)nJsK(8{65zPIj@I
zJ?v#4`#Hct4sn<x9OW3tIl)Ozahfxn<s9d^z(p=`nJZl78rQkOO>S|UJKW_S_j$lW
z9`TqbJmneBdBICw@tQZh<sI+&z(+punJ;|h8{hfCPk!;6Km6q%{}HI1{s~MFf)b42
zgdilL2u&Em5{~dhAR>{7OcbILKs2HggP6o3HgSkcJmM1&C}6;&Q2&=E;J}W+&fo#T
z&4O1#5|NlBBqbTiNkK|dk(xB5B^~L>Kt?i=nJi=_8`;T0PI8f(Jme)G`6)m_3Q?FM
z6r~u&DM3j}QJON8r5xp{Kt(E1nJQGJ8r7*mO=?k_I@F~e^=Uvu8qt_0G^H8MX+cX`
z(V8~2r5)|*Ku0>!nJ#pt8{O$aPkPatKJ=v@{TaYO1~Hf+3}qO@8No<KF`6-qWgO#~
zz(gi7nJG+V8q=A<OlC2gIm~4q^I5<`7O|KmEM*zXS;0zHv6?lkWgY9;z(zK)nJsK(
z8{65zPIj@IJ?v#4`#Hct4sn<x9OW3tIl)Ozahfxn<s9d^z(p=`nJZl78rQkOO>S|U
zJKW_S_j$lW9`TqbJmneBdBICw@tQZh<sI+&z(+punJ;|h8{hfCPk!;6Km6q%{}HIX
zSs*Y$2ud)56M~S0A~azLOE|(4frvyRGEs<10MUp}3}O<C*u)_&@rX|X5|W6-Bq1ru
zNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^C{77VQi{@)
zp)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!p)KubPX{{E
ziOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcniiOEc1D$|(G
z3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!44tBDO-Rxm6
z``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U_qfjk9`cCC
zJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b(|M-tU74%PFf)JEo1SbR`
z2}Nka5SDO+Cjt?PL}a26l>nj<ofyO<7O{y#T;dU*1SBL8iAh3Il98Mgq$CxoNkdxF
zk)8}>Bomp*LRPYoogCyO7rDtpUh<Kj0u-bWg(*T&icy>rl%y1;DMMMxQJxA^q!N{>
zLRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esVw4@cSX+vAu(Vh-;q!XR#LRY%cogVb0
z7rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{F`fxbWD=8^!c?X)of*tz7PFbdT;?&K
z1uSF{i&?@_ma&`_tYj6dS;Jb^v7QZVWD}d&!dAAiogM6C7rWWRUiPt{103WKhdIJg
zj&Yn5oa7XzIm21bah?lY<Pw*;!d0$uog3Wb7Pq;>UG8z82R!5vk9opVp7ER)yyO+H
zdBa=Y@tzNS<P)Fy!dJfWoge(<7r*(#U;gnQfhy{szyu*E!3a(WLK2G5gdr^92u}ne
z5{bw}Au0hxBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8g
zBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwq
zhq~0GJ`HF{BO23$rZl5DEoezATGNKMw4*&8=tw6z(}k{dqdPt5NiTZShraZqKLZ%Z
zAO<spp$ua<BN)jjMl*)7jAJ|#n8+k1Gli*4V>&aK$t-3whq=sSJ_}gLA{MiRr7UAP
zD_F@YR<nk+tYbYJ*vKX}vxTi}V>>(8$u4%YhrR4$KL<F-Ar5ndqa5QnCpgI|PIHE{
zoZ~zfxX2|gbA_v1<2pCE$t`Ykhr8V4J`Z@vBOddFr#$01FL=o-Uh{^xyyHC|_{b+d
z^M$W`<2yh2$uEBMhrj&eKLS<KKY<BCP=XPh5QHQYp$S7+!V#VbL?jZCi9%EYh(>f`
z5R+KMCJu3lM|={HkVGUV2}wyta#E0zRHP;iX-P+VGLVr>WF`w)$wqc^kds{GCJ%YZ
zM}7)WkU|uu2t_GIaY|5<Qk13)WhqB_Do~M1RHh15sYZ2bP?K8JrVe$fM|~R5kVZ77
z2~BB6b6U`nR<x!KZD~h)I?$0$bfybk=|*>Y(34*DrVoATM}Gz|kU<P)2tygha7Hka
zQH*8`V;RSICNPmnOlAsGnZ|TxFq2u#W)5?i$9xvBkVPzJ2}@bVa#paCRjg(WYgxy7
zHn5RRY-S5v*~WHuu#;WvW)FMW$9@iQkV72i2uC@_aZYfOQ=H}uXF11tE^v`cT;>W_
zxyE&FaFbiy<_>qc$9*2~kVib`2~T;(b6)V0SG?v8Z+XXiKJbxGeC7*Z`NnsC@RMKs
z<_~}Q$A1K>tbYO%grEc?I3Wm0C_)p4u!JK#5r{}6A`^wE1Q3nr#2_ZIh)o>g5|8*K
zAR&oJOcIikjO3&sC8<bF8q$)E^kg6-naE5QvXYJL<RB-x$W0#dl8^ippdf`POc9Dw
zjN+7_B&8@#8Ol<Q@>HNAm8eV=s#1;W)SxD{s7)Q}QjhvHpdpQDOcR>YjOMhUC9P;p
z8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SM
zlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w
z>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5z
za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN|Q_{|^w@{j)rR7L*;
zCI~?ZMsPw9l2C*u3}FdJcp?yyNJJ(IQ3)U#(TPD!ViB7-#3df_NkBppk(eYTB^k*{
zK}u4Qnlz*(9qGwHMlz9^EMz4c*~vjpa*>-n<Ru^ZDL_FAQJ5kWr5MF2K}kwcnlhB7
z9ObD%MJiF5DpaK!)u};EYEhdy)TJKvX+T37(U>MQr5Vj>K}%ZEnl`kh9qs8rM>^4&
zE_9_E-RVJ3deNIc^ravD8NfgWF_<9?Wf;R5!AM3inlX%J9OIe5L?$trDNJP=)0x3c
zW-*&N%w-<)S-?UTv6v++Wf{v^!Ae%Knl-Ft9qZY^MmDjTEo@~Q+u6ZRcCnj1>}4PO
zIlw^<ahM|<<rv2~!AVYWnlqf`9Ot>fMJ{ofD_rFo*SWz>ZgHDC+~pqkdB8&+@t7w(
z<r&X;!AoB8nm4@V9q;+TM?UeHFMQ=2-}%8$e({?>{N*425vZ#E2}}@z5{%%4AS9s(
zO&G!wj_^bvB9Vwp6rvJ9G@=uOn8YGBafnMi;*)@cBqA|MNJ=u2lY*3_A~k79OFGh%
zfsAA#Gg-(=HnNk0oa7=mdB{sX@>76<6rwOiC`vJkQ-YF|qBLbFOF7C@fr?b3GF7Nb
zHL6pCn$)5;b*M`{>eGORG@>z0Xi77h(}I??qBU)3OFP=rfsS;dGhOIPH@ee<p7f$O
zedtR+`ZIum3}P@t7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!G^R6ynapA~bC}CK=Cgo>
zEMhTBSjsY%vx1eZVl``6%R1JxfsJfpGh5ioHny{ao$O*ad)Ui9_H%%P9O5uXILa}O
zbApqc;xuPC%Q?<-fs0(?GFQ0DHLi1mo800yceu+v?(=|$JmN7=c*--L^MaSW;x%u0
z%RAolfscIRGhg`1H@@?OpZwxCfB4Hk{v%K|{S%lV1SJ^32|-9g5t=ZBB^=?2Ktv)D
znJ7dhfM`S~1~G|6Y~m1?c*G|G2}wj^l8}^SBqs$aNkwYXkd}0$Cj%MDL}s#(m26}u
z2RX?_Zt{?qeB`G91t~;ficpkd6sH6wDMe|@P?mC(rveqJL}jW_m1<O{1~sWgZR${$
zdeo-@4QWJUn$VPHG^YhEX+>+=(3W<zrvn}7L}$9tm2PyW2R-RUZ~D-ee)MMm0~y3%
zhA@<23}*x*8O3PEFqUzQX95$M#AK#0m1#_81~Zw(Z00bRdCX@43t7Zsmavp%EN2BP
zS;cDBu$FbKX9FAA#Addzm2GTi2RqrtZuYR3eeCA|2RX!Hj&PJ?9OnclImKztaF%nN
z=K>eG#AU8<m1|t*1~<9IZSHWFd)(&%4|&96p74}sJm&>3dBtnq@RoPH=K~-4#Am+n
zm2Z6K2S546Z~pL?fBZ+F>iQ=zK?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN&wM_P7Goa
zi`c{=F7b#@0uqvl#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%
z0SZ!x!W5w>#VAe*N>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR
z&1g;wTGEQvw4p8SXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2!x+v8Mly=g
zjA1O}7|#SIGKtAdVJg#@&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erKtYIzdSkDGF
zvWd-XVJq9%&JK36i{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-
z&JAvIi`(4cF88?410M2-$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=
zFaP+DKsEGFV1f{oU<4-wAqhoj!Vs2lgeL+Ki9}?g5S0L;5uF&sBo?uWLtNq!p9CZ%
z5s67cQj(FJ6r>~-sYydx(vhAFWF!-r$wF4Lk)0gmBp12KLtgTcp8^!55QQm1QHoKV
z5|pGAr71&M%2A#QRHPD>sX|q%QJospq!zWQLtW}op9VCf5shg=Q<~A77PO=lt!YDB
z+R>g4bfgoV=|We!(VZUjq!+#ELtpyQp8*VH5Q7=QP=+y_5sYLMqZz|k#xb4=Ok@(1
znZi`2F`XIAWEQiT!(8Sup9L&r5sO*EQkJot6|7_xt69TZ*0G)qY-AIg*}_(~v7H_4
zWEZ>H!(R5Wp937^5QjO!QI2t(6P)A}r#Zt}&T*a#T;vj$xx!Vhah)67<QBKN!(Hxi
zp9ehT5s!JoQ=ajh7rf*ZuX)2;-tnFfeB=|K`NCJe@tq(1<QKpB!(aaKAAxG>pTGnm
zD8UF$2tpEy(1al@;RsIzA`*$nL?J2xL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<
zDpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd
z6{tuhDpQ53RHHgIs7WnqQ-`|LqdpC2NFy54gr+p3IW1^OD_YZrwzQ)?9q33WI@5)&
zbfY^x=t(bn(}%wFqdx-}$RGwYgrN*$I3pOzC`L1ev5aFp6PU;(CNqVpOk+ATn8_?=
zGl#j%V?GO5$RZZAgrzKFIV)JnDps?GwX9=38`#JuHnWATY-2k+*vT$-vxmLxV?PHt
z$RQ4MgrgkeI43yCDNb{Svz+5R7r4kJE^~#eT;n=7xXCSUbBDX!<30~~$Ri%}gr_{?
zIWKt0D_--4x4h#$ANa^8KJ$gIeB(Pm_{lGR^M}9u<39q`(m#O-LQsMcoDhU06rl-2
zSi%vW2t*_jk%>Z70*FR*Vi1#9#3l}LiAQ`AkdQ<qCJ9MNMsiY+l2oK74QWY7dNPoa
zOk^etS;<Cra*&f;<R%Y!$wz()P>@0trU*qTMsZ3|l2VkW3}q=tc`8tmN>ru_RjEdG
zYEY9})TRz~sYiVp(2zznrU^}HMsr%wl2){)4Q**hdpgjOPIRUVUFk-5deD<z^rjDe
z=|_JCFpxnEW(Y$W#&AY3l2MFi3}YF`cqTBBNla!6Q<=teW-yak%w`UAna6w<u#iP8
zW(iAK#&TA$l2xo`4QpA)dN#0;O>AZhTiM2TcCeFO>}C&p*~fkkaF9bB<_JeQ#&J$?
zl2e@K3}-pVc`k5~OI+p(SGmS@Zg7)Z+~y8<xyOAT@Q_D5<_S-E#&cfql2^Ru4R3kJ
zdp_`yPkiPJU-`y&e(;lD{N@jT`Nw|*s;z$l6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@q
zs00v==)@o<v4~9^;u4SeBp@M)NK6uvl8oe}ASJ0tO&ZdYj`U<8Bbmrd7P69!?BpOP
zxyVf(@{*7I6rdo5C`=KGQjFr1pd_UzO&Q8kj`CEXB9*926{=E=>eQenwWv)U>QayT
zG@v1kXiO8D(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alc
zGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q
z%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjL
zBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1
z@BH8=zxd4`{_>Ci2vkS^1SSYU2}W>25Ry=YCJbQ-M|dI-kw`=)3Q-9l8qtYCOkxq6
zIK(9$@ku~J5|NlBBqbTiNkK|dk(xB5B^~L>Kt?i=nJi=_8`;T0PI8f(Jme)G`6)m_
z3Q?FM6r~u&DM3j}QJON8r5xp{Kt(E1nJQGJ8r7*mO=?k_I@F~e^=Uvu8qt_0G^H8M
zX+cX`(V8~2r5)|*Ku0>!nJ#pt8{O$aPkPatKJ=v@{TaYO1~Hf+3}qO@8No<KF`6-q
zWgO#~z(gi7nJG+V8q=A<OlC2gIm~4q^I5<`7O|KmEM*zXS;0zHv6?lkWgY9;z(zK)
znJsK(8{65zPIj@IJ?v#4`#Hct4sn<x9OW3tIl)Ozahfxn<s9d^z(p=`nJZl78rQkO
zO>S|UJKW_S_j$lW9`TqbJmneBdBICw@tQZh<sI+&z(+punJ;|h8{hfCPk!;6Km6q%
z{}HIJ{s~MFf)b42gdilL2u&Em5{~dhAR>{7OcbILKs2HggP6o3HgSkcJmQmpgd`#{
zNk~dEl9Pgzq#`wGNJ~1>lYxw6A~RXYN;a~SgPi0dH+jfQKJrt5f)t`KMJP%!ic^A;
zl%h0cC`&oYQ-O+9qB2#eN;RregPPQ$Hg%{=J?hhdhBTrvO=wCpn$v=ow4ya_XiGcV
z(}9k3qBC9SN;kUGgP!!FH+|?!Kl(F(fed0WLm0|1hBJbZjAArn7|S@uGl7XrVlq>h
z$~2}kgPF`?HglNEJm#~2g)Cw*OIXS>ma~GDtYS55Sj#%rvw@9lVl!LV$~LyMgPrVR
zH+$I2KK65fgB;>8M>xtcj&p*OoZ>WRILkTCbAgLo;xbpb$~CTYgPYvqHg~woJ?`^>
zhdkmjPk72Rp7Vm2yy7))c*{H9^MQ|i;xk|P$~V6AgP;83H-GrcKmH?7J^d4yAOs~C
z!3jY~LJ^uUge4r|i9kdm5t%4NC4gu|Ck8QzMQq{_mw3b{0SQS&Vv>-QWF#jADM>|Y
z(vX&Pq$dLz$wX$dkd<s?CkHvnMQ-wtmwe=>00k*TVTw?cViczYB`HN|%21Yal&1m}
zsYGR}P?c&_rv^2tMQ!R(mwMEv0S#$HW17&EW;CY-Eont-+R&DEw5J0d=|pF`(3Ng<
zrw2XhMQ{4hmwxnT00SAsV1_W1VGL&kBN@eL#xRy~jAsH9nZ#tKFqLUcX9hEw#cbv<
zmwC)*0Sj5gVwSL!Wh`d}D_O;A*07d!tY-ro*~DhHu$66WX9qjk#cuYnmwoK#00%k5
zVUBQ=V;tuMCppDw&Ty7<oaX`;xx{6zaFuIZ=LR>q#cl3zmwVjj0S|e^W1jGoXFTTx
zFL}jl-td-pyypWS`NU_w@Re_T=LbLe#c%%bmw)_6p!)hJFhK}PFoF|;kc1*MVF*h&
z!V`grL?SX#h)Mv_h)xV*5{uZxAujQVPXZE>h{PlzDalAq3R04a)TALT=}1ooGLnhR
zWFafr$W9J&l8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)P
zQj6Nup)U2PPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD
z&j1E8h`|hDD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^n
zDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~
z)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#
zKJtmreBmqK_|6Z0^6P&L)6=#P2mk=UPPT2^wrv|X+qP}nwr$(CZP%uqhxvrz7r*(#
zU;Ys&z<&Z0grEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8NFoxG
zgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}a
zDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpA?pdpQDOcR>YjOMhUC9P;p8`{#2
z_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SMlbFmD
zrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w>|iIm
z*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5za*z8w
z;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN}5@r&R5;V=IP)X;wd6NI1y
zBRC-lNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYlViSkB#3MclNJt_QlZ2!sBRMHZNh(s4
zhP0$3JsHSICNh(StYjlQImk&aa+8O=<Rd=?C`cg+Q-q=vqc|lfNhwNGhO(5SJQb)&
zB`Q;es#K#oHK<7~YEy^0)T2HP2%sU2XiO8D(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o
z?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s
z<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_
zILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a
z;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=|M838{NXSE2-L`b0uzLw1S2>h2uUbH6Na#a
zBRmm^NF*W?g{VX$Ix&bzEMgOfxWpqq2}npH5|f0aBqKQ~NJ%PElZLdUBRv_&NG39q
zg{)*FJ2}WnE^?EHyyPQ41t>@%3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$g{oAeIyI<C
zEoxJTy40gS4G5qijc800n$nEsw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad
z{tRFsgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GO
zma>fHtY9UpSj`&NvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<
zIL#T(a*p#{;3Ai}%oVP3jqBXtCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>
z;3J>-%oo1$jqm*6C;#z_-~8b({|MCBe*zPPpadg0AqYt*LKB9tgd;o=h)5(N6NRWm
zBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8gBRe_BNiK4e
zhrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwqhq~0GJ`D(<
zA&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7(v9x)peMcPO&|KwkNyl`AcGjp5QZ|0
z;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je%^c=3kNGTMA&Xed5|*-z<*Z;Ot60q%
z*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$kNq6rAcr{25sq?<<DB3mr#Q_S&T@|PT;L*?
zxXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n=e*!0uXxQH-tvz3eBdLW_{<l+@{RBO
z;3xm_i{Jd=FaHSC#D4-4grEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9O
zBR&a8NFoxGgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSS
zgrXFqI3*}aDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpA?pdpQDOcR>YjOMhU
zC9P;p8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=
z@l0SMlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2
zwz7@w>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+
zxXm5za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN}5@r&R5;V=IP
z)YN|h6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYlViSkB#3MclNJt_QlZ2!s
zBRMHZNh(s4hP0$3JsHSICNh(StYjlQImk&aa+8O=<Rd=?C`cg+Q-q=vqc|lfNhwNG
zhO(5SJQb)&B`Q;es#K#oHK<7~YEy^0)T2HP2%sU2XiO8D(v0S`pe3znO&i+Mj`nn*
zBc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb
z>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh
z_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZ
zc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=|M838{NXSE2-M7f0uzLw1S2>h
z2uUbH6Na#aBRmm^NF*W?g{VX$Ix&bzEMgOfxWpqq2}npH5|f0aBqKQ~NJ%PElZLdU
zBRv_&NG39qg{)*FJ2}WnE^?EHyyPQ41t>@%3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$
zg{oAeIyI<CEoxJTy40gS4G5qijc800n$nEsw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcI
zC%x!RANtad{tRFsgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q
z`7B@|i&)GOma>fHtY9UpSj`&NvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;
zj&h9SoZuv<IL#T(a*p#{;3Ai}%oVP3jqBXtCbziF9qw|E`#j(wk9f=zp7M<6yx=9T
zc+DH$@{ad>;3J>-%oo1$jqm*6C;#z_-~8b({|MCFe*zPPpadg0AqYt*LKB9tgd;o=
zh)5(N6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8g
zBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwq
zhq~0GJ`D(<A&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7(v9x)peMcPO&|KwkNyl`
zAcGjp5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je%^c=3kNGTMA&Xed5|*-z
z<*Z;Ot60q%*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$kNq6rAcr{25sq?<<DB3mr#Q_S
z&T@|PT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n=e*!0uXxQH-tvz3eBdLW
z_{<l+@{RBO;3xm_i{Jd=FaHSC!hZr2grEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#e
zh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+I
zBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpA?pdpQD
zOcR>YjOMhUC9P;p8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6
zB%>J37{)S=@l0SMlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo
z^=x1xo7l`2wz7@w>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lo
zu5yj*+~6j+xXm5za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN}5
z@r&R5;V=IP)Y5+f6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYlViSkB#3Mcl
zNJt_QlZ2!sBRMHZNh(s4hP0$3JsHSICNh(StYjlQImk&aa+8O=<Rd=?C`cg+Q-q=v
zqc|lfNhwNGhO(5SJQb)&B`Q;es#K#oHK<7~YEy^0)T2HP2%sU2XiO8D(v0S`pe3zn
zO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)j
zB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD
z?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6
z?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=|M838{NXSE2-M1d
z0uzLw1S2>h2uUbH6Na#aBRmm^NF*W?g{VX$Ix&bzEMgOfxWpqq2}npH5|f0aBqKQ~
zNJ%PElZLdUBRv_&NG39qg{)*FJ2}WnE^?EHyyPQ41t>@%3R8rl6r(sLC`l<wQ--pX
zqdXO;NF^#$g{oAeIyI<CEoxJTy40gS4G5qijc800n$nEsw4f!eXiXd1(vJ3Ypd+2=
zOc%P+jqdcIC%x!RANtad{tRFsgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_
zCbO8$9Og2Q`7B@|i&)GOma>fHtY9UpSj`&NvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2
z{T$#Rhd9g;j&h9SoZuv<IL#T(a*p#{;3Ai}%oVP3jqBXtCbziF9qw|E`#j(wk9f=z
zp7M<6yx=9Tc+DH$@{ad>;3J>-%oo1$jqm*6C;#z_-~8b({|MCDe*zPPpadg0AqYt*
zLKB9tgd;o=h)5(N6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U
z$VetKlZC8gBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!Kj
zqdGOHNiAwqhq~0GJ`D(<A&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7(v9x)peMcP
zO&|KwkNyl`AcGjp5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je%^c=3kNGTM
zA&Xed5|*-z<*Z;Ot60q%*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$kNq6rAcr{25sq?<
z<DB3mr#Q_S&T@|PT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n=e*!0uXxQH
z-tvz3eBdLW_{<l+@{RBO;3xm_i{Jd=FaHSC#(x46grEc?I3Wm0C_)p4u!JK#5r{}6
zA`^wEL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{
z$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|L
zqdpA?pdpQDOcR>YjOMhUC9P;p8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S
z%n*h$jNy!6B%>J37{)S=@l0SMlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCg
zC97D?8rHIo^=x1xo7l`2wz7@w>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!
z^IYH}m$=Lou5yj*+~6j+xXm5za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrd
zzVeOl{NN}5@r&R5;V=IP)Yg9j6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYl
zViSkB#3MclNJt_QlZ2!sBRMHZNh(s4hP0$3JsHSICNh(StYjlQImk&aa+8O=<Rd=?
zC`cg+Q-q=vqc|lfNhwNGhO(5SJQb)&B`Q;es#K#oHK<7~YEy^0)T2HP2%sU2XiO8D
z(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$
z%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{
zBb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP
z>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=|M838
z{NXSE2-MDh0uzLw1S2>h2uUbH6Na#aBRmm^NF*W?g{VX$Ix&bzEMgOfxWpqq2}npH
z5|f0aBqKQ~NJ%PElZLdUBRv_&NG39qg{)*FJ2}WnE^?EHyyPQ41t>@%3R8rl6r(sL
zC`l<wQ--pXqdXO;NF^#$g{oAeIyI<CEoxJTy40gS4G5qijc800n$nEsw4f!eXiXd1
z(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad{tRFsgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4
z%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GOma>fHtY9UpSj`&NvX1p^U?ZE@%oet?jqU7U
zC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<IL#T(a*p#{;3Ai}%oVP3jqBXtCbziF9qw|E
z`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>;3J>-%oo1$jqm*6C;#z_-~8b({|MCHe*zPP
zpadg0AqYt*LKB9tgd;o=h)5(N6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2w
zQj><Xq$52U$VetKlZC8gBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTq
zs7NI$Q-!KjqdGOHNiAwqhq~0GJ`D(<A&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7
z(v9x)peMcPO&|KwkNyl`AcGjp5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je
z%^c=3kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$kNq6r
zAcr{25sq?<<DB3mr#Q_S&T@|PT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n
z=e*!0uXxQH-tvz3eBdLW_{<l+@{RBO;3xm_i{Jd=FaHSC!G8i1grEc?I3Wm0C_)p4
zu!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<DpHe%w4@_F8OTT`
zGLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd6{tuhDpQ53RHHgI
zs7WnqQ-`|LqdpA?pdpQDOcR>YjOMhUC9P;p8`{#2_H>{lo#;##y3&pA^q?ob=uIE`
z(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SMlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG
z%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(
zB&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5za*z8w;31EA%oCpSjOV=IC9inR8{YDc
z_k7?ZpZLrdzVeOl{NN}5@r&R5;V=IP)X{$e6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@q
zs6-<=F^EYlViSkB#3MclNJt_QlZ2!sBRMHZNh(s4hP0$3JsHSICNh(StYjlQImk&a
za+8O=<Rd=?C`cg+Q-q=vqc|lfNhwNGhO(5SJQb)&B`Q;es#K#oHK<7~YEy^0)T2HP
z2%sU2XiO8D(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alc
zGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q
z%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjL
zBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1
z@BH8=|M838{NXSE2-L}c0uzLw1S2>h2uUbH6Na#aBRmm^NF*W?g{VX$Ix&bzEMgOf
zxWpqq2}npH5|f0aBqKQ~NJ%PElZLdUBRv_&NG39qg{)*FJ2}WnE^?EHyyPQ41t>@%
z3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$g{oAeIyI<CEoxJTy40gS4G5qijc800n$nEs
zw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad{tRFsgBZ*ZhBA!dj9?_A7|j^Q
zGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GOma>fHtY9UpSj`&NvX1p^U?ZE@
z%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<IL#T(a*p#{;3Ai}%oVP3jqBXt
zCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>;3J>-%oo1$jqm*6C;#z_-~8b(
z{|MCCe*zPPpadg0AqYt*LKB9tgd;o=h)5(N6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?
zq$DFbDM(2wQj><Xq$52U$VetKlZC8gBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TL
zN>hfil%qTqs7NI$Q-!KjqdGOHNiAwqhq~0GJ`D(<A&qEE6PnVD=Cq(Ct!Paf+R~2p
zbf6=h=u8*7(v9x)peMcPO&|KwkNyl`AcGjp5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNW
zGL7lXU?#Je%^c=3kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEY+xgs*vuBTvW@NRU?;oS
z%^vo$kNq6rAcr{25sq?<<DB3mr#Q_S&T@|PT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}
zA&+>>6Q1&n=e*!0uXxQH-tvz3eBdLW_{<l+@{RBO;3xm_i{Jd=FaHSC#eV`5grEc?
zI3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<DpHe%
zw4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd6{tuh
zDpQ53RHHgIs7WnqQ-`|LqdpA?pdpQDOcR>YjOMhUC9P;p8`{#2_H>{lo#;##y3&pA
z^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SMlbFmDrZSD`%wQ(7n9UsK
zGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w>|iIm*v%gHvXA{7;2?)M
z%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5za*z8w;31EA%oCpSjOV=I
zC9inR8{YDc_k7?ZpZLrdzVeOl{NN}5@r&R5;V=IP)YX3i6NI1yBRC-lNhm@ShOmSq
zJQ0XUBq9@qs6-<=F^EYlViSkB#3MclNJt_QlZ2!sBRMHZNh(s4hP0$3JsHSICNh(S
ztYjlQImk&aa+8O=<Rd=?C`cg+Q-q=vqc|lfNhwNGhO(5SJQb)&B`Q;es#K#oHK<7~
zYEy^0)T2HP2%sU2XiO8D(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd7
z3}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZ
zvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw
z%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`v
zBcJ%p7rye1@BH8=|M838{NXSE2-MAg0uzLw1S2>h2uUbH6Na#aBRmm^NF*W?g{VX$
zIx&bzEMgOfxWpqq2}npH5|f0aBqKQ~NJ%PElZLdUBRv_&NG39qg{)*FJ2}WnE^?EH
zyyPQ41t>@%3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$g{oAeIyI<CEoxJTy40gS4G5qi
zjc800n$nEsw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad{tRFsgBZ*ZhBA!d
zj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GOma>fHtY9UpSj`&N
zvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<IL#T(a*p#{;3Ai}
z%oVP3jqBXtCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>;3J>-%oo1$jqm*6
zC;#z_-~8b({|MCGe*zPPpadg0AqYt*LKB9tgd;o=h)5(N6NRWmBRVmNNi1R$hq%Ne
zJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8gBRe_BNiK4ehrHw?KLsdAAqrE3
zq7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwqhq~0GJ`D(<A&qEE6PnVD=Cq(C
zt!Paf+R~2pbf6=h=u8*7(v9x)peMcPO&|KwkNyl`AcGjp5QZ|0;f!D;qZrK?#xjoa
zOkg6Dn9LNWGL7lXU?#Je%^c=3kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEY+xgs*vuBT
zvW@NRU?;oS%^vo$kNq6rAcr{25sq?<<DB3mr#Q_S&T@|PT;L*?xXcx<a*gZU;3l`Y
z%^mJ?kNZ5}A&+>>6Q1&n=e*!0uXxQH-tvz3eBdLW_{<l+@{RBO;3xm_i{Jd=FaHSC
z!+!!3grEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=R
zIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@
zvXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpA?pdpQDOcR>YjOMhUC9P;p8`{#2_H>{l
zo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SMlbFmDrZSD`
z%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w>|iIm*v%gH
zvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5za*z8w;31EA
z%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN}5@r&R5;V=IP)YE?g6NI1yBRC-l
zNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYlViSkB#3MclNJt_QlZ2!sBRMHZNh(s4hP0$3
zJsHSICNh(StYjlQImk&aa+8O=<Rd=?C`cg+Q-q=vqc|lfNhwNGhO(5SJQb)&B`Q;e
zs#K#oHK<7~YEy^0)T2HP2%sU2XiO8D(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o?)0E1
zz35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@
zEMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_
za*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk
z%^TkGj`w`vBcJ%p7rye1@BH8=|M838{NXSE2-M4e0uzLw1S2>h2uUbH6Na#aBRmm^
zNF*W?g{VX$Ix&bzEMgOfxWpqq2}npH5|f0aBqKQ~NJ%PElZLdUBRv_&NG39qg{)*F
zJ2}WnE^?EHyyPQ41t>@%3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$g{oAeIyI<CEoxJT
zy40gS4G5qijc800n$nEsw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad{tRFs
zgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GOma>fH
ztY9UpSj`&NvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<IL#T(
za*p#{;3Ai}%oVP3jqBXtCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>;3J>-
z%oo1$jqm*6C;#z_-~8b({|MCEe*zPPpadg0AqYt*LKB9tgd;o=h)5(N6NRWmBRVmN
zNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8gBRe_BNiK4ehrHw?
zKLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwqhq~0GJ`D(<A&qEE
z6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7(v9x)peMcPO&|KwkNyl`AcGjp5QZ|0;f!D;
zqZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je%^c=3kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTE
zY+xgs*vuBTvW@NRU?;oS%^vo$kNq6rAcr{25sq?<<DB3mr#Q_S&T@|PT;L*?xXcx<
za*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n=e*!0uXxQH-tvz3eBdLW_{<l+@{RBO;3xm_
zi{Jd=FaHSC$A1D7grEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8
zNFoxGgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFq
zI3*}aDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpA?pdpQDOcR>YjOMhUC9P;p
z8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SM
zlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w
z>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5z
za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN}5@r&R5;V=IP)YpFk
z6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYlViSkB#3MclNJt_QlZ2!sBRMHZ
zNh(s4hP0$3JsHSICNh(StYjlQImk&aa+8O=<Rd=?C`cg+Q-q=vqc|lfNhwNGhO(5S
zJQb)&B`Q;es#K#oHK<7~YEy^0)T2HP2%sU2XiO8D(v0S`pe3znO&i+Mj`nn*Bc13>
z7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jz
zvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t
z9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?
z@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=|M838{NXSE2-MGi0uzLw1S2>h2uUbH
z6Na#aBRmm^NF*W?g{VX$Ix&bzEMgOfxWpqq2}npH5|f0aBqKQ~NJ%PElZLdUBRv_&
zNG39qg{)*FJ2}WnE^?EHyyPQ41t>@%3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$g{oAe
zIyI<CEoxJTy40gS4G5qijc800n$nEsw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!R
zANtad{tRFsgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|
zi&)GOma>fHtY9UpSj`&NvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9S
zoZuv<IL#T(a*p#{;3Ai}%oVP3jqBXtCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$
z@{ad>;3J>-%oo1$jqm*6C;#z_-~8b({|MCIe*zPPpadg0AqYt*LKB9tgd;o=h)5(N
z6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8gBRe_B
zNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwqhq~0G
zJ`D(<A&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7(v9x)peMcPO&|KwkNyl`AcGjp
z5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je%^c=3kNGTMA&Xed5|*-z<*Z;O
zt60q%*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$kNq6rAcr{25sq?<<DB3mr#Q_S&T@|P
zT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n=e*!0uXxQH-tvz3eBdLW_{<l+
z@{RBO;3xmHQx8jkXjv2mtIM`++qP}nwr$(CZQHhO+pf+Jd4n9>;oTAI{NxwE`NLoS
z5um^R2}mFU6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYlViSkB#3MclNJt_Q
zlZ2!sBRMHZNh(s4hP0$3JsHSICNh(StYjlQImk&aa+8O=<Rd=?C`cg+Q-q=vqc|lf
zNhwNGhO(5SJQb)&B`Q;es#K#oHK<7~YEy^0)T2HPXh<U((}bopqd6^TNh?~@hPJe$
zJss#sCpy!Gu5_b2J?Kd<deeu#^rJrm7|0+7GlZcGV>lxi$tXrMhOvxeJQJA6BqlS3
zsZ3)!GnmONW;2Jm%ws+aSjZw4vxKEAV>v5W$tqT}hPA9?Jsa4_CN{H$t!!gEJJ`uC
zcC&}Q>|;L%ILILmbA+QD<2WZc$tg~AhO?aGJQujgB`$M?t6bwcH@L|yZgYpb+~YnE
zc*r9j^Mt27<2f&Q$tzy-hPS-qJs<eUCqDCquYBV>KlsTne)EUF{3E~s{S%Nt1SSYU
z2}W>25Ry=YCJbQ-M|dI-kw`=)3Q>thbYc*bSi~j{afwHK5|EHYBqj+-Nk(!~kdjoS
zCJkvxM|v`lkxXPJ3t7oVc5;xDT;wJXdC5n93Q&+j6s8D8DMoQhP?A!VrVM2%M|mnx
zkxEpi3RS5_b!t$PTGXZvb*V>v8qknNG^PnnX-0Ee(2`cPrVVXrM|(QZkxq1`3tj0(
zcY4s1Ui799ed$Mk1~8C83}y&J8OCr%Fp^P>W(;E)$9N_%kx5Ku3R9WJbY?J<S<Ge*
zbD76{7O;>-EM^HyS;lf!u##1*W({ju$9gufkxgu73tQR7c6P9nUF>ELd)dc+4seh|
z9Oei|ImU5LaFSD;<_u>!$9XPrkxN|W3Rk(tb#8EzTioUjce%%X9`KMyJmv{cdB$^I
z@RC=&<_&Lo$9q2TkxzW)3t#!hcYg4bU;O3|fB8p%f%+#Pfe1_xf)b42gdilL2u&Em
z5{~dhAR>{7OcbILjp)Q6Cb5W39O4p>_#_}9iAYQml9G(%q#z}!NKG2jl8*FbAS0Q`
zOct_|jqKzgC%MQ?9`cfp{1l)dg(yrBic*Z?l%OP~C`}p4QjYRepdyv1Ockn9jq22(
zCbg(d9qLk#`ZS;+jc800n$nEsw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad
z{tRFsgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GO
zma>fHtY9UpSj`&NvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<
zIL#T(a*p#{;3Ai}%oVP3jqBXtCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>
z;3J>-%oo1$jqm*6C%^d3AO7->0E6^TKmrk%AOs~C!3jY~LJ^uUge4r|i9kdm5t%4N
zB^uF*K}=#1n>fTJ9`Q*)LK2afBqSvn$w@&<QjwZ8q$M5c$v{Rjk(n%HB^%kvK~8d!
zn>^$tANeUjK?+frA{3<<#VJ8aN>Q3Jl%*WysX#?4QJE@Kr5e?#K}~8=n>y5`9`$KJ
zLmJVTCN!lP&1pePTG5&|w51*G=|D$1(U~rEr5oMpK~H+on?CfVAN?7?Kn5|GAq-_0
z!x_OyMlqT(jAb0-nZQIQF_|e$Wg63&!Axc`n>oy79`jkiLKd-@B`jqb%UQunR<W8j
ztYsbR*}z6Nv6(GwWgFYs!A^Fun?3AhANx7LK@M@4BOK)z$2q}CPH~zuoaG$nxxhs(
zahWSz<r>$y!A)*)n>*a)9`|{`Lmu&%Cp_gD&w0U1Uh$eYyyYG5`M^g$@tH4t<s0Am
z!B2kin?L;J9{~pIpMV4+FhK}PFoF|;kc1*MVF*h&!V`grL?SX#h)Oh~6N8w<A~tb|
zOFZI}fP^F>F-b^DGLn;ml%ygxX-G>t(vyLVWFj+J$VxV{lY^Y(A~$)+OFr^bfPxgF
zFhwXzF^W@yl9Zw}WhhHI%2R=gRH8Cfs7f`eQ-hk+qBeD?OFin-fQB@pF->SnGn&(a
zmb9WZZD>n7+S7rKbfPm|=t?)b(}SM$qBni$OF#NEfPoBRFhdy1ForXNk&I$AV;IXg
z#xsG5Oky%qn94M!GlQATVm5P_%RJ_@fQ2k#F-us=GM2M~m8@blYgo%V*0X_)Y+^H8
z*vdAxvxA-NVmEu(%RcsVfP)<3Fh@AbF^+SBlbqr-XE@6_&U1l_T;eiUxXLxIbAy}Q
z;x>1<%RTP%fQLNdF;95PGoJH;m%QRNZ+Oc)-t&QveBv`-_{ulF^MjxK;x~Wz%Rd4P
z(LVtRL|}ptlwbrW1R)7UXu=SdaD*oU5s5@(q7ap6L?;F@iA8MU5SMtwCjkjbL}HSV
zlw>3)1u02IYSNIFbfhN(8OcOuvXGT*WG4qX$whARke7VqrvL>hL}7|hlwuU81SKg&
zY06NRa+Ie66{$pJs!)|`RHp_tsYPw-P?vhtrvVLVL}QxJlx8%i1ubbsYueD3cC@Dh
z9qB}8y3mzwbf*VB=|yk)(3gJnX8;2k#9)Rnlwk~K1S1*6XvQ#>ag1jI6Pd(hrZAOh
zOlJl&nZ<18Fqe7EX8{XY#A24Plw~Yu1uI#_YSyrpb*yIt8`;EWwy>3LY-a~M*~M=5
zu$O)8=Ku#e#9@wblw%y{1SdJgY0hw#bDZY_7rDe`u5guWT;~Qixy5bnaF=`B=K&9S
z#ABZDlxIBW1uuEUYu@mdcf98VANj;*zVMZAeCG#0`NePk@RxrC7^;5)5{SSAAt=EJ
zP6$F0iqM21Ea3=G1R@fN$V4G3(TGkAViJqk#33&6h))6%l8D44At}j7P6|?ziqxbb
zE$K*41~QU~%w!=e*~m@~a*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq1u9aB
z%2c5$)u>JlYEp~Z)S)i*s80hL(ul@1p()L1P77Mniq^EDE$wJe2RhP;&UB$G-RMpa
zdeV#D^r0{P=+6KKGKj$pVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}%waC`
zn9l+hvWUejVJXX4&I(qtiq))PE$dj%1~#&Z&1_*S+t|(ycCw4z>|rna*v|nDa)`qm
z;V8#A&IwL(iqo9oEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}&I?}h
zir2j1E$?{G2R`zN&wSx4-}ufCe)5ao{NXSE2rx|l1SAlF2|`eU5u6Z&Bov_uLs-HQ
zo(M!F5|N2QRH6}`7{nwNv57-m;t`(&BqR}uNkUSRk(?ByBo(PiLt4_2o(yCp6Pd|E
zR<e<u9ONVyxyeIb@{ykc6r>P^DMC?-QJfN#q!gtoLs`mEo(fc?5|yb!RjN^)8q}l~
zwW&j0>QSEtG^7!YX+l$)(VP~vq!q1cLtEO>o(^=R6P@WoSGv)i9`vLaz3D?=`q7^O
z3}g_48NyJ8F`N;MWE7(r!&t^Ko(W835|f$2RHiYV8O&rBvzfzO<}sfIEMyUjS;A75
zv78mGWEHDf!&=s{o(*hd6Pww>R<^O79qeQmyV=8D_OYJ>9OMv(Il@tnahwyJ<P@hl
z!&%O8o(o*$5|_EcRjzTJ8{Fg;x4FYz?s1<7Jme9NdBRhk@thaD<Q1=Z!&~0*o)3KF
z6QB9QSHAI`AN=GOzxl&o{t;lf{s~AR0uzLw1S2>h2uUbH6Na#aBRmm^NF*W?g{VX$
zIx&bzEMgOfxWpqq2}npH5|f0aBqKQ~NJ%PElZLdUBRv_&NG39qg{)*FJ2}WnE^?EH
zyyPQ41t>@%3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$g{oAeIyI<CEoxJTy40gS4QNOs
z8q<WPG^05!Xh|zt(}uRRqdgtyNGCeeg|2j?J3Z)0FM895zVxF%0~p941~Y`A3}ZMW
z7|AF`GlsE@V>}a>$Rs8+g{e$qIy0EbEM_x@xy)le3s}e^7PEw<EMqw<Sjj3@vxc>-
zV?7(#$R;+kg{^F3J3H9PE_Snrz3gK@2RO(f4s(Q~9OF1AILRqabB42=<2)C*$R#dw
zg{xfSIybn<EpBs%yWHbG4|vEU9`l5!JmWbpc*!eX^M<#)<2@hv$R|GYg|B?$J3siz
zFMjifzx*S>2>la~Km;ZTK?z21LJ*QrgeDAO2}gJ$5RphkCJIrBMs#8jlUT$i4snS`
zd=ik5L?k8&Nl8X>Qjn5Vq$Uk%Nk@7zkdaJeCJR}~Ms{+LlU(E`4|&N)ehN^KLKLP5
zMJYycN>Gwgl%@=2DMxuKP?1VhrV3T5Ms;dXlUmfK4t1$VeHze^Ml_}gO=(7RTF{bK
zw5APhX-9iH(2-7brVCx^Mt6G9lV0?u4}IxJe+Dp+K@4UHLm9?!Mlh05jAjgD8OL}g
zFp)`2W(rf8#&l*dlUdAW4s)5ud={{fMJ#3sOIgNpR<M#)tY!^sS;u-du#rt{W(!-{
z#&&kFlU?j)4}00iehzSuLmcJ^M>)oEPH>V_oaPK?ImdY}aFI(~<_cH2#&vFRlUv;8
z4tKf7eID?TM?B^UPkF|3UhtAvyygvWdB=M`@R3h^<_ll>#&>@3lVAMi4}bYbfRXwq
zAb|)>5P}kn;DjI~p$JVF!V-?~L?9xOh)fis5{>A@ASSVhO&sD9kN6}YA&E##5|WaP
z<fI@asYp#4(vpt!WFRA%$V?Wpl8x--ASb!VO&;=+kNgy%AcZJQ5sFfb;*_8yr6^4q
z%2JN<RG=c2s7w{AQjO}=peD7bO&#h|kNPyAA&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h
z=u8*7(v9x)peMcPO&|KwkNyl`AcGjp5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lX
zU?#Je%^c=3kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$
zkNq6rAcr{25sq?<<DB3mr#Q_S&T@|PT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>
z6Q1&n=e*!0uXxQH-tvz3eBdLW_{<l+@{RBO;3vQM%^&{qj{u|ePe1|@m>>it7{Lia
zNJ0^sFoY!>;fX**A`zJ=L?s&0i9t+a5t}%~B_8ofKtd9cm?R`68OcdON>Y)UG^8aR
z>B&GwGLe}qWF;Hf$w5wXk()f^B_H`IKtT#om?9LV7{w_;NlH<gGL)qp<*7hLDp8p#
zRHYi#sX<L@QJXr{r5^QZKtmeQm?ku(8O>=yOIp#IHngQ3?dd>AI?<Ufbfp{J=|N9=
z(VIT>r62tnz(58um>~>h7{eLCNJcT5F^pv#<C(xjCNY^QOl2C=nZZnEF`GHeWghcc
zz(N+Wm?bP_8OvG0N>;I&HLPVF>)F6YHnEv4Y-JnU*}+bBv70^YWgq)Fz(Edim?IqJ
z7{@umNltN^Go0ld=efW|E^(PFT;&?qxxr0tahp5b<sSEWz(XGKm?u2t8P9paOJ4Ds
zH@xK?@A<$-KJl3^eB~S8`N2<q@tZ&V<sSh?>z{xGA}~P+N-%;Gf{=tFG+_u!IKmTw
zh(sbXQHV-3q7#Fd#3D9vh)X=;lYoRIA~8uwN-~m@f|R5pHEBpoI?|JYjASA+S;$H@
zvXg_H<RUkD$V)!*Q-FdLqA*1$N->I4f|8V?G-W7DIm%Okid3R9Rj5ies#AlS)S@<Z
zs7pQS(}0FFqA^WqN;8_%f|j(RHEn21JKEEMj&!0kUFb?Ty3>Q6^rAO?=u1EPGk}2%
zVlYD($}omAf{~13G-DXcIL0%9iA-WLQ<%y$rZa<?%wjfkn9Drovw(#xVlhit$}*O-
zf|aadHEUSQI@Ys+jcj5wTiD7rwzGqs>|!^2*vmflbAW>!;xI=z$}x^}f|H!$G-o)=
zInHx|i(KL|SGdYGu5*K%+~PKOxXV56^MHpu;xSKn$}^txf|tDFHE(#!JKpnwk9^`Y
zU--&5zVm~h{Ngu%_{%>6jL|;<2}EFm5R_m9Cj=o0MQFkhmT-h80uhNsWTFt2XhbIl
zF^NTN;t-d3#3um>Nkn3jkd$O3Cj}`<MQYNJmUN^i0~yIgX0ni#Y-A?~ImtzC@{pH&
z<fi}yDMVq4P?Ta6rvxP_MQO@VmU5J*0u`x5WvWn>YE-8NHK|2y>QI+@)TaRrX+&e1
z(3EC0rv)u(MQhs7mUgtK10Cr^XS&dpZgi&yJ?TYn`p}nt^k)DA8N^_QFqB~oX9Ob|
z#c0MbmT`<{0u!0UWTr5cX-sDZGnvI~<}jCe%x3`$S;S(Nu#{yiX9X)+#cI~DmUXOW
z0~^`IX11`EZER-;JK4o<_OO?I?B@UnImBU(aFk;l=L9D?#c9rPmUEov0vEZ&Wv+0Q
zYh33BH@U@a?r@iT+~)xgdBkI$@RVmf=LIi$#cSU1mUq1810VUsXTI>2Z+zzmKl#OP
z{_vN71Q@G-0uqS81R*HF2u=t>5{l4-AuQntPXr<oiO57DD$$5e3}O<C*u)_&@rX|X
z5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^
zC{77VQi{@)p)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!
zp)KubPX{{EiOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcni
ziOEc1D$|(G3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!4
z4tBDO-Rxm6``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U
z_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b({|GQn{{$ou
zfeAuTf)Sh$gd`N92}4-I5uOM{BodK{LR6v=ofyO<7O{y#T;dU*1SBL8iAh3Il98Mg
zq$CxoNkdxFk)8}>Bomp*LRPYoogCyO7rDtpUh<Kj0u-bWg(*T&icy>rl%y1;DMMMx
zQJxA^q!N{>LRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esVw4@cSX+vAu(Vh-;q!XR#
zLRY%cogVb07rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{F`fxbWD=8^!c?X)of*tz
z7PFbdT;?&K1uSF{i&?@_ma&`_tYj6dS;Jb^v7QZVWD}d&!dAAiogM6C7rWWRUiPt{
z103WKhdIJgj&Yn5oa7XzIm21bah?lY<Pw*;!d0$uog3Wb7Pq;>UG8z82R!5vk9opV
zp7ER)yyO+HdBa=Y@tzNS<P)Fy!dJfWoge(<7r*(#U;Ytby#5JDAOaJFpadg0AqYt*
zLKB9tgd;o=h)5(N6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U
z$VetKlZC8gBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!Kj
zqdGOHNiAwqhq~0GJ`HF{BO23$rZl5DEoezATGNKMw4*&8=tw6z(}k{dqdPt5NiTZS
zhraZqKLZ%ZAO<spp$ua<BN)jjMl*)7jAJ|#n8+k1Gli*4V>&aK$t-3whq=sSJ_}gL
zA{MiRr7UAPD_F@YR<nk+tYbYJ*vKX}vxTi}V>>(8$u4%YhrR4$KL<F-Ar5ndqa5Qn
zCpgI|PIHE{oZ~zfxX2|gbA_v1<2pCE$t`Ykhr8V4J`Z@vBOddFr#$01FL=o-Uh{^x
zyyHC|_{b+d^M$W`<2yh2$uEBMhrj$Izy$phkU#_`2tf%(a6%B0P=qE7VF^cgA`p>C
zL?#MRiAHo{5R+KMCJu3lM|={HkVGUV2}wyta#E0zRHP;iX-P+VGLVr>WF`w)$wqc^
zkds{GCJ%YZM}7)WkU|uu2t_GIaY|5<Qk13)WhqB_Do~M1RHh15sYZ2bP?K8JrVe$f
zM|~R5kVZ772~BB6b6U`nR<x!KZD~h)I?$0$bfybk=|*>Y(34*DrVoATM}Gz|kU<P)
z2tygha7HkaQH*8`V;RSICNPmnOlAsGnZ|TxFq2u#W)5?i$9xvBkVPzJ2}@bVa#paC
zRjg(WYgxy7Hn5RRY-S5v*~WHuu#;WvW)FMW$9@iQkV72i2uC@_aZYfOQ=H}uXF11t
zE^v`cT;>W_xyE&FaFbiy<_>qc$9*2~kVib`2~T;(b6)V0SG?v8Z+XXiKJbxGeC7*Z
z`NnsC@RMKs<_~}QM}Ud?Cm?|cOb~(+jNpVIB%ugR7{U^c@I)XYk%&wbq7seh#2_ZI
zh)o>g5|8*KAR&oJOcIikjO3&sC8<bF8q$)E^kg6-naE5QvXYJL<RB-x$W0#dl8^ip
zpdf`POc9DwjN+7_B&8@#8Ol<Q@>HNAm8eV=s#1;W)SxD{s7)Q}QjhvHpdpQDOcR>Y
zjOMhUC9P;p8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J3
z7{)S=@l0SMlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1x
zo7l`2wz7@w>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*
z+~6j+xXm5za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN|Q_{|^w
z@{a(M^iMzn5ttwZB^bd8K}bRonlOYV9N~#TL?RKHC`2V1(TPD!ViB7-#3df_NkBpp
zk(eYTB^k*{K}u4Qnlz*(9qGwHMlz9^EMz4c*~vjpa*>-n<Ru^ZDL_FAQJ5kWr5MF2
zK}kwcnlhB79ObD%MJiF5DpaK!)u};EYEhdy)TJKvX+T37(U>MQr5Vj>K}%ZEnl`kh
z9qs8rM>^4&E_9_E-RVJ3deNIc^ravD8NfgWF_<9?Wf;R5!AM3inlX%J9OIe5L?$tr
zDNJP=)0x3cW-*&N%w-<)S-?UTv6v++Wf{v^!Ae%Knl-Ft9qZY^MmDjTEo@~Q+u6ZR
zcCnj1>}4POIlw^<ahM|<<rv2~!AVYWnlqf`9Ot>fMJ{ofD_rFo*SWz>ZgHDC+~pqk
zdB8&+@t7w(<r&X;!AoB8nm4@V9q;+TM?UeHFMQ=2-}%8$e({?>{N*13ChMPo1R^j&
z2ud)56M~S0A~azLOE|(4frvyRGEs<1G@=uOn8YGBafnMi;*)@cBqA|MNJ=u2lY*3_
zA~k79OFGh%fsAA#Gg-(=HnNk0oa7=mdB{sX@>76<6rwOiC`vJkQ-YF|qBLbFOF7C@
zfr?b3GF7NbHL6pCn$)5;b*M`{>eGORG@>z0Xi77h(}I??qBU)3OFP=rfsS;dGhOIP
zH@ee<p7f$OedtR+`ZIum3}P@t7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!G^R6ynapA~
zbC}CK=Cgo>EMhTBSjsY%vx1eZVl``6%R1JxfsJfpGh5ioHny{ao$O*ad)Ui9_H%%P
z9O5uXILa}ObApqc;xuPC%Q?<-fs0(?GFQ0DHLi1mo800yceu+v?(=|$JmN7=c*--L
z^MaSW;x%u0%RAolfscIRGhg`1H@@?OpZwxCfB4Hk0!-0A0SQE4f)JEo1SbR`2}Nka
z5SDO+Cjt?PL}a26m1smK1~G|6Y~m1?c*G|G2}wj^l8}^SBqs$aNkwYXkd}0$Cj%MD
zL}s#(m26}u2RX?_Zt{?qeB`G91t~;ficpkd6sH6wDMe|@P?mC(rveqJL}jW_m1<O{
z1~sWgZR${$deo-@4QWJUn$VPHG^YhEX+>+=(3W<zrvn}7L}$9tm2PyW2R-RUZ~D-e
ze)MMm0~y3%hA@<23}*x*8O3PEFqUzQX95$M#AK#0m1#_81~Zw(Z00bRdCX@43t7Zs
zmavp%EN2BPS;cDBu$FbKX9FAA#Addzm2GTi2RqrtZuYR3eeCA|2RX!Hj&PJ?9Oncl
zImKztaF%nN=K>eG#AU8<m1|t*1~<9IZSHWFd)(&%4|&96p74}sJm&>3dBtnq@RoPH
z=K~-4#Am+nm2Z6K2S546Z~pL?e*~DSe*zMSzyu*E!3a(WLK2G5gdr^92u}ne5{bw}
zAu7>`P7Goai`c{=F7b#@0uqvl#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRC
zi`?WPFZsw%0SZ!x!W5w>#VAe*N>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES
z0~*qZ#x$WR&1g;wTGEQvw4p8SXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2
z!x+v8Mly=gjA1O}7|#SIGKtAdVJg#@&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erK
ztYIzdSkDGFvWd-XVJq9%&JK36i{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$x
za*4}a;VRd-&JAvIi`(4cF88?410M2-$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x
z&JTX_i{Jd=FaHQIP5%TW5P=CoP=XPh5QHQYp$S7+!V#VbL?jZCi9%GO5uF&sBo?uW
zLtNq!p9CZ%5s67cQj(FJ6r>~-sYydx(vhAFWF!-r$wF4Lk)0gmBp12KLtgTcp8^!5
z5QQm1QHoKV5|pGAr71&M%2A#QRHPD>sX|q%QJospq!zWQLtW}op9VCf5shg=Q<~A7
z7PO=lt!YDB+R>g4bfgoV=|We!(VZUjq!+#ELtpyQp8*VH5Q7=QP=+y_5sYLMqZz|k
z#xb4=Ok@(1nZi`2F`XIAWEQiT!(8Sup9L&r5sO*EQkJot6|7_xt69TZ*0G)qY-AIg
z*}_(~v7H_4WEZ>H!(R5Wp937^5QjO!QI2t(6P)A}r#Zt}&T*a#T;vj$xx!Vhah)67
z<QBKN!(Hxip9ehT5s!JoQ=ajh7rf*ZuX)2;-tnFfeB=|K`NCJe@tq(1<QKpB!(aXp
zV7mSZNFV|egrEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8NFoxG
zgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}a
zDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpC2NFy54gr+p3IW1^OD_YZrwzQ)?
z9q33WI@5)&bfY^x=t(bn(}%wFqdx-}$RGwYgrN*$I3pOzC`L1ev5aFp6PU;(CNqVp
zOk+ATn8_?=Gl#j%V?GO5$RZZAgrzKFIV)JnDps?GwX9=38`#JuHnWATY-2k+*vT$-
zvxmLxV?PHt$RQ4MgrgkeI43yCDNb{Svz+5R7r4kJE^~#eT;n=7xXCSUbBDX!<30~~
z$Ri%}gr_{?IWKt0D_--4x4h#$ANa^8KJ$gIeB(Pm_{lGR^M}9uBft#(6OcdzCI~?Z
zMsPw9l2C*u3}FdJcp?yyNJJ(IQHe%$Vi1#9#3l}LiAQ`AkdQ<qCJ9MNMsiY+l2oK7
z4QWY7dNPoaOk^etS;<Cra*&f;<R%Y!$wz()P>@0trU*qTMsZ3|l2VkW3}q=tc`8tm
zN>ru_RjEdGYEY9})TRz~sYiVp(2zznrU^}HMsr%wl2){)4Q**hdpgjOPIRUVUFk-5
zdeD<z^rjDe=|_JCFpxnEW(Y$W#&AY3l2MFi3}YF`cqTBBNla!6Q<=teW-yak%w`UA
zna6w<u#iP8W(iAK#&TA$l2xo`4QpA)dN#0;O>AZhTiM2TcCeFO>}C&p*~fkkaF9bB
z<_JeQ#&J$?l2e@K3}-pVc`k5~OI+p(SGmS@Zg7)Z+~y8<xyOAT@Q_D5<_S-E#&cfq
zl2^Ru4R3kJdp_`yPkiPJU-`y&e(;lD{N@jT`A2}6`X?ZP2uu)y5{%%4AS9s(O&G!w
zj_^bvB9Vwp6rvK1=)@o<v4~9^;u4SeBp@M)NK6uvl8oe}ASJ0tO&ZdYj`U<8Bbmrd
z7P69!?BpOPxyVf(@{*7I6rdo5C`=KGQjFr1pd_UzO&Q8kj`CEXB9*926{=E=>eQen
zwWv)U>QayTG@v1kXiO8D(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd7
z3}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZ
zvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw
z%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`v
zBcJ%p7rye1@BH8=zxd4`{_>9iv-D3u0uh)X1SJ^32|-9g5t=ZBB^=?2Ktv)DnJ7dh
z8qtYCOkxq6IK(9$@ku~J5|NlBBqbTiNkK|dk(xB5B^~L>Kt?i=nJi=_8`;T0PI8f(
zJme)G`6)m_3Q?FM6r~u&DM3j}QJON8r5xp{Kt(E1nJQGJ8r7*mO=?k_I@F~e^=Uvu
z8qt_0G^H8MX+cX`(V8~2r5)|*Ku0>!nJ#pt8{O$aPkPatKJ=v@{TaYO1~Hf+3}qO@
z8No<KF`6-qWgO#~z(gi7nJG+V8q=A<OlC2gIm~4q^I5<`7O|KmEM*zXS;0zHv6?lk
zWgY9;z(zK)nJsK(8{65zPIj@IJ?v#4`#Hct4sn<x9OW3tIl)Ozahfxn<s9d^z(p=`
znJZl78rQkOO>S|UJKW_S_j$lW9`TqbJmneBdBICw@tQZh<sI+&z(+punJ;|h8{hfC
zPk!;6Km6q%0cPu;fCM5iK?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkc
zJmQmpgd`#{Nk~dEl9Pgzq#`wGNJ~1>lYxw6A~RXYN;a~SgPi0dH+jfQKB5GO(*JOz
zC<h;gNF8PW_JE#2qXd=3!w~-YzX1OW6Ffjb0uh)X1SJ^32|-9g5t=ZBB^=?2Ktv)D
znJ7dh8qtYCOkxq6IK(9$@ku~J5|NlBBqbTiNkK|dk(xB5B^~L>Kt?i=nJi=_8`;T0
zPI8f(Jme)G`6)m_3Q?FM6r~u&DM3j}QJON8r5xp{Kt(E1nJQGJ8r7*mO=?k_I@F~e
z^=Uvu8qt_0G^H8MX+cX`(V8~2r5)|*Ku0>!nJ#pt8{O$aPkPatKJ=v@{TaYO1~Hf+
z3}qO@8No<KF`6-qWgO#~z(gi7nJG+V8q=A<OlC2gIm~4q^I5<`7O|KmEM*zXS;0zH
zv6?lkWgY9;z(zK)nJsK(8{65zPIj@IJ?v#4`#Hct4sn<x9OW3tIl)Ozahfxn<s9d^
zz(p=`nJZl78rQkOO>S|UJKW_S_j$lW9`TqbJmneBdBICw@tQZh<sI+&z(+punJ;|h
z8{hfCPk!;6Km6q%0p9%huYWrfq!5KELQ#rQoD!6z6s0LcS;|qK3RI*Lm8n8is!^R9
z)T9=*sY6}rQJ)4hq!Ep2LQ|U2oEEgC6|HGQTiVf{4s@gwo#{eXy3w5;^rRQP=|f-o
z(VqbfWDtWH!cc}WoDqy<6r&l#SjI7)2~1=XlbOO)rZJrv%w!g`nZsP>F`or2WD$#5
z!cvy8oE5BO6{}gpTGp|i4Qyl+o7uuvwy~WZ>|__a*~4D;v7ZAR<Pe8B!cmTKoD-bn
z6sI}ES<Z2u3tZ$9m$|}Ku5q0k+~gLwxx-!Vai0e~<Pnc~!c(5{oEN<06|Z^2Ti)@W
z4}9bkpZUU9zVV$O{NxwE`NLoS5g_J=aQ__uLlmSCg(*T&icy>rl%y1;DMMMxQJxA^
zq!N{>LRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esVw4@cSX+vAu(Vh-;q!XR#LRY%c
zogVb07rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{F`fxbWD=8^!c?X)of*tz7PFbd
zT;?&K1uSF{i&?@_ma&`_tYj6dS;Jb^v7QZVWD}d&!dAAiogM6C7rWWRUiPt{103WK
zhdIJgj&Yn5oa7XzIm21bah?lY<Pw*;!d0$uog3Wb7Pq;>UG8z82R!5vk9opVp7ER)
zyyO+HdBa=Y@tzNS<P)Fy!dJfWoge(<7r*(#U;Ysw=8I_m9RNcVq!5KELQ#rQoD!6z
z6s0LcS;|qK3RI*Lm8n8is!^R9)T9=*sY6}rQJ)4hq!Ep2LQ|U2oEEgC6|HGQTiVf{
z4s@gwo#{eXy3w5;^rRQP=|f-o(VqbfWDtWH!cc}WoDqy<6r&l#SjI7)2~1=XlbOO)
zrZJrv%w!g`nZsP>F`or2WD$#5!cvy8oE5BO6{}gpTGp|i4Qyl+o7uuvwy~WZ>|__a
z*~4D;v7ZAR<Pe8B!cmTKoD-bn6sI}ES<Z2u3tZ$9m$|}Ku5q0k+~gLwxx-!Vai0e~
z<Pnc~!c(5{oEN<06|Z^2Ti)@W4}9bkpZUU9zVV$O{NxwE`NLoS5g_J+c>f&$LlmSC
zg(*T&icy>rl%y1;DMMMxQJxA^q!N{>LRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esV
zw4@cSX+vAu(Vh-;q!XR#LRY%cogVb07rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{
zF`fxbWD=8^!c?X)of*tz7PFbdT;?&K1uSF{i&?@_ma&`_|8MWUzq)?!gO78kCN*ty
zcckAm`8I9Sk<m77nvSGt)241*h`1*rZo!3$Tfn{d#=SQ<!Ht4@p}2A3-s0Ze58L}b
z=l&1!!{_4oa2)yg!*zVVcwe7$t_xPNnl-Ft9qY*=n+<Ga6Pww>R&vN?8{65zPIj@I
zJ>;>MedJTXehzSuLmcJ^M>)oEPH>V!PH~zd&Ty7<oaX`;xkNFSxx!Vhah(!waFbiy
z<_>qc$9*2~kVib`2~R2I8P9paOJ4DsH@xK?@A<$-K2b*0mlu8*27raANF^#$g{o8|
zn(EY`Cbg(d40VX5E^*YOJ`HF{BO23$rZl5DEoezRtw^9XiL{|D?PyO2I?{>GB#}%P
zy3&pA^dN<v^rAPZ^r0{P=+6KKGKj&Xkxm9f7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!
zG^R6ynapA~bI4>a^O(;97P5%NEMY0jSk4MovWnHLVJ+)ePZrs1U?ZE@%oetiLoVCc
z&JK36i{0!YkG<?8p91!CfP)<3Fh@AbF^+SBlN54_(-d)rvz+5R7r4kJin+`cu5yj*
zlyHNa+~PKOxXV56^MHpu;xSKnN-58H&I?}hir2j1E$?{G2R`zNGNQgL{7D!97NR1R
zs7w{AQjKV;Q-hk+qBb$qA(pztQIGmGpdpQDOcR>YjOMhUCGoT(fz~9_hPJe$Jss#s
zCpwcvGF|9OH@ee<6nfH&-lWopzVxF%0~p9429riQ84O`4!x+v8Mly=gjA1O}7|#SI
zGKtAdVJg#@&J1QUi`mQ}lex@eJ_}gLA{MiRr7UAPD_F@YR<nk+tYbY{WV3;dY+^H8
z*h&t$Y-2k+*vT$-vxhwPvX6WU*v|nDa)`qm;V8#A&IwLZ$SF=!#2L<Vj`LjLB9|!U
zGFQ0DHLg>_4Q_Ia+uY$U_qfjk9`cCCJmD#&JmWbpc*!eX^M<#)<2@hv$S2B(`ts1v
z!vL@l6{$pJs!)|`L{ps_)T9=*iJ=a$)FqC3)TaRrX+&e1(3EC0rv)vErxgjbCXqI@
zr5)|*Ku0>!nIw|wLRY%cogSpnlV0>Dl|J;PAN?7?Kn5|GG}6gn2tygha7HkaQH*8`
zV;RSICNPmnOlAsGnZ|TxFq2u#W)7LmWghccz(N+Wm?bP_8OvG0N>;I&HLPVF>&YUU
z4Qyl+o7uuva>!*H+u6ZRcCnj1<gu51<Ws<Y4seh|9Oei|ImU5LaFRk!ahf8|aF%nN
z=K>eGL@}4S!d0$uof2+vlUv;84tKf7eID?TM?B^UPbuXY&w0U1Uh$eYyyYG5`M^g$
zQAX64dA|w+z(Q1{5|yb!RjLt9b!t$PTGS?nI>b_!IO<WK1~jA*jcGztn$esVv?QKZ
zB+!~f+R&DEw5J0d=|pFeNTv&2=|*>YkU~#-(VJBI(3gJnX8;2k#9-1$CxamjWf;R5
z!AM3inlX%J9OIe5L?$trDNJP=)0x3cW-*&NWHOg|%x3`$S;S(Nu#{yiX9X)+#cI~D
zmUXNri)=Qqkxgu73tPz{mu+lk2RqrtZuXGJUiOhs0sA??K@M@4BOK)z$2q}C3OU7T
zia5hr&T*a#T;vkPT;>W_xyE%$xWP?sahp5b<sSEWz(XGKm?u1?lxIBW1uuEUYu@md
zcf98VANfQXQD5d(3j@GHRHPD>sX|q%5lwY!P?K8JCWbo1QkOXDQJ)4hq!Ep2LQ|U2
zoEEeso>nB#nnc>rmUgtK10Cr^XOc*!3tj0(cY2UQPkPatRQk}De)MMm0~y3%(nu$R
zAq-_0!x_OyMlqT(jAb0-nZQIQF_|e$Wg63&!Axc`n>l1MmwC)*0Sj5gVwSL!Wh`d}
zD_O;A*07d!tS5_XHn5RRY-S5v$sw0*Y-a~M*~M=5kjGy3kxv2pIlw^<ahM|<<rv2~
z!AS}^#c7H-!&%O8o(o*$62)BR3Rk(tbxOFwO>S|UJKW_S_j$lW9`TqbJf)OpJm&>3
zdBtnq@RoPH=K~-4L>W<EX4eS=z(Q1{5|yb!RjLt9b!t$PTGS?nI>b_!IO<WK1~jA*
zjcGztn$esVv?QKZB+!~f+R&DEw5J0d=|pFeNTv&2=|*>YkU~#-(VJBI(3gJnX8;2k
z#9-1$CxamjWf;R5!AM3inlX%J9OIe5L?$trDNJP=)0x3cW-*&NWHOg|%x3`$S;S(N
zu#{yiX9X)+#cI~DmUXNri)=Qqkxgu73tPz{mu+lk2RqrtZuXGJUiOhs0sA??K@M@4
zBOK)z$2q}C3OU7Tia5hr&T*a#T;vkPT;>W_xyE%$xWP?sahp5b<sSEWz(XGKm?u1?
zlxIBW1uuEUYu@mdcf98VANfQXQD3fX6b68Ws7NI$Q-!KjBbw^epeD7bO$>F2r7m&Q
zqdpC2NFy54gr+p3IW1^OJgrEeHHoyLE$wJe2RhP;&Lojc7rN4o?(`sqp7f$Osq~>Q
z{pimC1~Q1jq>)YrLm0|1hBJbZjAArn7|S@uGl7XrVlq>h$~2}kgPF`?Hgm{iF7uer
z0v57}#Vlbd%UI3|R<erKtYIzdSWgz&Y+xgs*vuBTl0z=r*v<}ivWwm9A&<T6BcB5H
zbAW>!;xI=z$}x^}f|C?-iqjNvhO?aGJQujgC5pMs6|QoP>y&VVo800yceu+v?(=|$
zJmN7=cuFbHc+Lx6@`~5I;VtiY&j&v8i87+TTo@k)fQ6_?B`Q;es#GJI>eQenwWv)D
zb%>=danz$e4QNOs8q<WPG^05!Xh}S+NT4-|w4p8SXio<^(uvL_kxUo5(v9x)Acdav
zqBp7Zp)dXD&j1E8h{2?hP6k65$}omAf{~13G-DXcIL0%9iA-WLQ<%y$rZa<?%wjfk
z$Yd__n9l+hvWUejVJXX4&I(qtiq))PE$diM7TIiIBb(UF7PgW@F5B474tBDO-RvQc
zz3d~O0`_x&gB;>8M>xtcj&p*O6mp8w6mf>LoZ~zfxX2}nxy%)=a*gYhaD$uN;x>1<
z%RTP%fQLNdF;93(DbIM$3tsYy*Sz5^?|9D#KJtk&Twl;W3;+vJkxEpi3RS5_G}Wm=
zO=?k_80rv9UE-)meHze^Ml_}gO=(7RTF{bsT9H6&5@|zQ+R>g4bfgoVNg|mpbfp{J
z=|KuT=|yi+=|f-o(VqbfWDtW%Bb^L}FqB~oX9Ob|#c0MbmT`<{0u!0UWTr5cX-sDZ
zGnvI~=8(x;<}sfIEMyUjS;A75v78mGWEHDf!&=s{o-DH2z(zK)nJsK3hg`O?ogM6C
z7rWU*9(&nGJ_YRO00%k5VUBQ=V;tuMCn@9<rzzqLXF11tE^v`c6myv?T;&?qDd7e;
zxy5bnaF=`B=K&9S#ABZDlv1AYoEN<06|Z^2Ti)@W4}9bkW%&LjUBdvd5EZFJWvWn>
zYD80=8q}l~wTYn)vD77wdeo-@4QWJUn$VPHG^YhEiKi6_v?h@@w51*G=|D$1(U~NY
z=|We!(VZTo(34*DCY3()r62tnz(58um^9MKU<gAQ#&AY3l2MFi3}YF`cqTBBNla!6
zQ<=teW-yak%w`Um%w-<)S-?UTv6v++Wf{v^!Ae%Knl-Ft9qY*=n+<Ga6Pww>R&vN?
z8{65zPIj@IJ>;>MedJTXehzSuLmcJ^M>)oEPH>V!PH~zd&Ty7<oaX`;xkNFSxx!Vh
zah(!waFbiy<_>qc$9*2~kVib`2~R2I8P9paOJ4DsH@xK?@A<$-K2b*0m!nd{0I(1h
zsYGR}P?c&#Q=J;rq!zV_p$@UsC60R3rvVLVL}QxJlx8%i1ucoE6$!K^kv6oY9qs8r
zM>^4&B$DYuSGv)i9;DEdUi2oFKJ=v@{TaYO1~Hg4(#c>5Lm9?!Mlh05jAjgD8OL}g
zFp)`2W(rf8#&l*dlUdAW4w=kl9`jkiLKd-@B`jqb%UQunR<W8jtYsbR$s(H#Y-AIg
z*}_(G$YmSb*}+bBv70^Qv6p@1Q^0->aF9bB<_JeQ#&J$?l0r^#nj+3{mUEov0vEYN
zF_*c*RjzTJ5^ivlTioUjce%%X9`KMyJmv{cDdic@dBICw@tQZh<sI+&z(+n&_Kot6
z^GAI(+y_v>YE+~Wm8n8isu4|fYEY9})Fy^H#8Q_y>QSEtG^7!YX+l$)(VP~vB%W3z
z(3(Wr(3W<zrvn}7L}!vnrVCx^Mt6FULQi_pn^gMHmwxnT00SAsVA4n@gCPuM7{eLC
zNJcT5F^pv#<C(xjCNY^QOl2C=nZZnEF`GGLGM9PGX8{XY#A24Plw~Yu1uI#_YSyrp
zb*v|gY&NiwO>AZhTgf4pZER-;JK4o<_K?S3_K{Bk`#Hct4sn<x9OW3tIl)N^ImKy;
zIKx@aah?lY<PybP<_cH2#&t@#!A)*)n>*a)9`|{`Lmu&%Cp@K;XFTTxFL}jl-td-p
zyypWS`9#?_%9o4!{aa<_!ZU!?s7NI$Q-!KjBbw^epeD7bO$>F2r7m&QqdpC2NFy54
zgr+p3IW1^OJgrEeHHoyLE$wJe2RhP;&Lojc7rN4o?(`sqp7f$Osq~>Q{pimC1~Q1j
zq>)YrLm0|1hBJbZjAArn7|S@uGl7XrVlq>h$~2}kgPF`?Hgm{iF7uer0v57}#Vlbd
z%UI3|R<erKtYIzdSWgz&Y+xgs*vuBTl0z=r*v<}ivWwm9A&<T6BcB5HbAW>!;xI=z
z$}x^}f|C?-iqjNvhO?aGJQujgC5pMs6|QoP>y&VVo800yceu+v?(=|$JmN7=cuFbH
zc+Lx6@`~5I;VtiY&j&v8iL%cV;19n1?^}Pq;?KW9Ilju*_)W_5b$*NA=6Co8|C`_C
z_xOGOfN%1Le8IQ)Bfiad_>w>7yZi}%%AfH){+z$yFZnCJ&ky(^KjN?X8~&Cb^LP9`
zKj9zvDgVen@z4AVKjY{8E5G31_;-HEfAF9DivOYl6{$pJs!)|`L{ps_)T9=*iJ=a$
z)FqC3)TaRrX+&e1(3EC0rv)vErxgjbCXqI@r5)|*Ku0>!nIw|wLRY%cogSpnlV0>D
zl|J;PAN?7?Kn5|GG}6gn2tygha7HkaQH*8`V;RSICNPmnOlAsGnZ|TxFq2u#W)7Lm
zWghccz(N+Wm?bP_8OvG0N>;I&HLPVF>&YUU4Qyl+o7uuva>!*H+u6ZRcCnj1<gu51
z<Ws<Y4seh|9Oei|ImU5LaFRk!ahf8|aF%nN=K>eGL@}4S!d0$uof2+vlUv;84tKf7
zeID?TM?B^UPbuXY&w0U1Uh$eYyyYG5`M^g$QTDn2Z-)LWSc{5OqB2#eN;RUXP7P{O
zi`v9chgj+oM?LD(fQB@pF->SnGn&(amc-MF1X`0w8`{#2_H>{lo#;#w$#kJB-RMpa
zQs_x9dXq{Y`qGd73}7IG7)%=JWH5xG3}ZMW7|AF`GlsE@V>}a>$Rs8+g{e$qIy0Eb
zEM_x@Oy)9=`7B@|i&)GOma>fHtY9UpSj`&NvX1p+k<A7+vWd-XVJkW0vW@NRU?;oS
z%^vdD%RcfcU_S>q$RQ4MgrgkeI43wsA*VP^5ob8dInHx|i(I0Z%Ut0q*SJmzH@L|y
zZgYpb+~YnEc*r9j^Mt3A@{H%a;3cnk%^TkGj`w`vBcCYy-2Wej{wr9Eid3R9Rj5ie
zqNz>|YEp~Z#88J=>Jmpi>eGORG@>z0Xi77h(}I@7(~1OIlSmud(vJ3Ypd+2=OcKd-
zp)1|!P7hM(NiTYnN+0^tkNyl`AcGi88tG&(grN*$I3pOzC`L1ev5aFp6PU;(CNqVp
zOk+ATn8_?=Glxv(GLQKzU?GcG%o3KejODCgC97D?8rHIo^<<IF1~#&Z&1_*SIpng9
z?d)JDyV%Vh^4QBh@+n|H2RO(f4s(Q~9OF1AI7uO=I86~}ILkTCbAgLoqL|BE;VRd-
zP6;=-$t`Ykhr8V4J`Z@vBOddFr<C%H=e*!0uXxQH-tvz3eBdLWDEr+17oq<O)}kVn
zs7w{AQjKV;Q-hk+qBb$qA(pztQIGmGpdpQDOcR>YjOMhUCGoT(fz~9_hPJe$Jss#s
zCpwcvGF|9OH@ee<6nfH&-lWopzVxF%0~p9429riQ84O`4!x+v8Mly=gjA1O}7|#SI
zGKtAdVJg#@&J1QUi`mQ}lex@eJ_}gLA{MiRr7UAPD_F@YR<nk+tYbY{WV3;dY+^H8
z*h&t$Y-2k+*vT$-vxhwPvX6WU*v|nDa)`qm;V8#A&IwLZ$SF=!#2L<Vj`LjLB9|!U
zGFQ0DHLg>_4Q_Ia+uY$U_qfjk9`cCCJmD#&JmWbpc*!eX^M<#)<2@hv$S2A^_y4WX
ze+6q%kxEpi3RS5_G}Wm=O=?k_80rv9UE-)meHze^Ml_}gO=(7RTF{bsT9H6&5@|zQ
z+R>g4bfgoVNg|mpbfp{J=|KuT=|yi+=|f-o(VqbfWDtW%Bb^L}FqB~oX9Ob|#c0Mb
zmT`<{0u!0UWTr5cX-sDZGnvI~=8(x;<}sfIEMyUjS;A75v78mGWEHDf!&=s{o-DH2
zz(zK)nJsK3hg`O?ogM6C7rWU*9(&nGJ_YRO00%k5VUBQ=V;tuMCn@9<rzzqLXF11t
zE^v`c6myv?T;&?qDd7e;xy5bnaF=`B=K&9S#ABZDlv1AYoEN<06|Z^2Ti)@W4}9bk
zWuN>1qtJf^Yf+I(RHh15sYW!_sX<L@QJWa*5KCR+s7HMo(2zznrU^}HMsr%wl6YE?
zKx-0dLtEO>o(^=R6P-ySnJ#pt8{O$a3O(sXZ&K+)U;5FX0SsgigGnQu42Ce2VGL&k
zBN@eL#xRy~jAsH9nZ#tKFqLUcX9hEw#cbw~$z0|!p9L&r5sO*EQkJot6|7_xt69TZ
z*0G)}vf02!HnEv4Y$b<Wwy~WZ>|__a*+U+C*+)JF?B@UnImBU(aFk;l=L9Dy<P@hV
z;tXdw$9XPrkxLYFnJZl78rLb|1~<9IZSHWFd)(&%4|&96p74}Xp7ER)yyO+HdBa=Y
z@tzNS<P&9|`~P<6zk;=>NF^#$g{o8|n(EY`Cbg(d40VX5E^*YOJ`HF{BO23$rZl5D
zEoezRtw^9XiL{|D?PyO2I?{>GB#}%Py3&pA^dN<v^rAPZ^r0{P=+6KKGKj&Xkxm9f
z7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!G^R6ynapA~bI4>a^O(;97P5%NEMY0jSk4Mo
zvWnHLVJ+)ePZrs1U?ZE@%oetiLoVCc&JK36i{0!YkG<?8p91!CfP)<3Fh@AbF^+SB
zlN54_(-d)rvz+5R7r4kJin+`cu5yj*lyHNa+~PKOxXV56^MHpu;xSKnN-58H&I?}h
zir2j1E$?{G2R`zNvd{g0C-h&zT2!PGm8n8isu4|fYEY9})Fy^H#8Q_y>QSEtG^7!Y
zX+l$)(VP~vB%W3z(3(Wr(3W<zrvn}7L}!vnrVCx^Mt6FULQi_pn^gMHmwxnT00SAs
zVA4n@gCPuM7{eLCNJcT5F^pv#<C(xjCNY^QOl2C=nZZnEF`GGLGM9PGX8{XY#A24P
zlw~Yu1uI#_YSyrpb*v|gY&NiwO>AZhTgf4pZER-;JK4o<_K?S3_K{Bk`#Hct4sn<x
z9OW3tIl)N^ImKy;IKx@aah?lY<PybP<_cH2#&t@#!A)*)n>*a)9`|{`Lmu&%Cp@K;
zXFTTxFL}jl-td-pyypWS`9#_0{=W?USFjcpsYGR}P?c&#Q=J;rq!zV_p$@UsC60R3
zrvVLVL}QxJlx8%i1ucoE6$!K^kv6oY9qs8rM>^4&B$DYuSGv)i9;DEdUi2oFKJ=v@
z{TaYO1~Hg4(#c>5Lm9?!Mlh05jAjgD8OL}gFp)`2W(rf8#&l*dlUdAW4w=kl9`jki
zLKd-@B`jqb%UQunR<W8jtYsbR$s(H#Y-AIg*}_(G$YmSb*}+bBv70^Qv6p@1Q^0->
zaF9bB<_JeQ#&J$?l0r^#nj+3{mUEov0vEYNF_*c*RjzTJ5^ivlTioUjce%%X9`KMy
zJmv{cDdic@dBICw@tQZh<sI+&z(+n&_PPH*4*gfK78R*PWvWn>YD80=8q}l~wTYn)
zvD77wdeo-@4QWJUn$VPHG^YhEiKi6_v?h@@w51*G=|D$1(U~NY=|We!(VZTo(34*D
zCY3()r62tnz(58um^9MKU<gAQ#&AY3l2MFi3}YF`cqTBBNla!6Q<=teW-yak%w`Um
z%w-<)S-?UTv6v++Wf{v^!Ae%Knl-Ft9qY*=n+<Ga6Pww>R&vN?8{65zPIj@IJ>;>M
zedJTXehzSuLmcJ^M>)oEPH>V!PH~zd&Ty7<oaX`;xkNFSxx!Vhah(!waFbiy<_>qc
z$9*2~kVib`2~R2I8P9paOJ4DsH@xK?@A<$-K2i3$|L=zWD_Dz)RH8Cfs7f`WsZI@Q
zQj6NeP={FR5=TAi(}0FFqA^WqN;8_%f|kV7iUeAdNE_PHj`nn*Bc13>63KL-E8XZ$
z4^rq!FM5+oANtad{tRFsgBVO2>0~g3p$ua<BN)jjMl*)7jAJ|#n8+k1Gli*4V>&aK
z$t-3whfL-&kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEWRcAVHnNG$Y+)-o<g$(J>|iIm
z*v%gD*vmfhDPTVbILILmbA+QD<2WZcNg<~=O%Z1}%Q?<-fs0(Cn9E$@D%ZG92{*XO
zEpBs%yWHbG4|vEU9`l5!l=6({yx=9Tc+DH$@{ad>;3J<X``rJZg#Ig7i;7gDGF7Nb
zHKM6b4Qf)0+Qd+YSn3i-J?hhdhBTrvO=wCpn$v=o#M6odT9Zf{+R~2pbf6=h=u8sH
zbfGKV=uQt(=t(bnlS&`@(vSWOU?77SOd9EAFodBDV>lxi$tXrMhOvxeJQJA6BqlS3
zsZ3)!GnmONW;2IO<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`d`b%?38IiOp<bD>>w{
zjqU7UC%f3q9`e}BKJqDGKL<F-Ar5ndqa5QnCpbwVr#MX!XE@6_&U1l_T%wrET;VF$
zxK0T-xXCSUbBDX!<30~~$Ri%}gr}79jOV=IC9inR8{YDc_k7?ZpD6p>|DT5bD_Dz)
zRH8Cfs7f`WsZI@QQj6NeP={FR5=TAi(}0FFqA^WqN;8_%f|kV7iUeAdNE_PHj`nn*
zBc13>63KL-E8XZ$4^rq!FM5+oANtad{tRFsgBVO2>0~g3p$ua<BN)jjMl*)7jAJ|#
zn8+k1Gli*4V>&aK$t-3whfL-&kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEWRcAVHnNG$
zY+)-o<g$(J>|iIm*v%gD*vmfhDPTVbILILmbA+QD<2WZcNg<~=O%Z1}%Q?<-fs0(C
zn9E$@D%ZG92{*XOEpBs%yWHbG4|vEU9`l5!l=6({yx=9Tc+DH$@{ad>;3J<X``rJZ
zh5jp8i;7gDGF7NbHKM6b4Qf)0+Qd+YSn3i-J?hhdhBTrvO=wCpn$v=o#M6odT9Zf{
z+R~2pbf6=h=u8sHbfGKV=uQt(=t(bnlS&`@(vSWOU?77SOd9EAFodBDV>lxi$tXrM
zhOvxeJQJA6BqlS3sZ3)!GnmONW;2IO<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`d`b
z%?38IiOp<bD>>w{jqU7UC%f3q9`e}BKJqDGKL<F-Ar5ndqa5QnCpbwVr#MX!XE@6_
z&U1l_T%wrET;VF$xK0T-xXCSUbBDX!<30~~$Ri%}gr}79jOV=IC9inR8{YDc_k7?Z
zpD6o$2f+8j9RT43V3+_AI|0f?{du?pAWVQT0U~$;{6*LURPa1hkxEpi3RS5_G}Wm=
zO=?k_80rv9UE-)meHze^Ml_}gO=(7RTF{bsT9H6&5@|zQ+R>g4bfgoVNg|mpbfp{J
z=|KuT=|yi+=|f-o(VqbfWDtW%BlI7^{r_d?KlC5o1BlS~0Ln%ERp>wTANr5j{=Xmk
z5B-P!Bewq^g#JVSq5p{O|A(Rf(0}MZV*CG5=s)xy`j6QD|2p&^`Vaj_Z2x}~`Vakw
z{v)>kzYYC|{zLx}+y9S4|DpfTf5i6xccK5#f9OAA`~UmUf9OB-AF=)aB=jHp5B*1I
z|GtTCxC0=Z08V!bFoPisWf;R5!AM3inlX%J9OIe5L?$trDNJP=)0x3cW-*&NWHOg|
z%x3`$S;S(Nu#{yiX9X)+#cI~DmUXNri)=Qqkxgu73tPz{mu+lk2RqrtZuXEzgie5R
zQ9liL0E7t;CO`yFfPV~ofC|0>R3zL15S{??{XZyRKL<F-Ar5ndqa5QnCpbwVr#MX!
zXE@6_&U1l_T%wrET;VF$xK0T-xXCSUbBDX!<30~~$Ri%}gr}79jOV=IC9inR8{YDc
z_k7?ZpD6o$58ywA{zLy^2N1D4fPW7ChyFwV5!?U2g#JVSq5p{O|7W59(0}MZV*CGj
z=s)xy`j6QD|26a<`Vaj_Z2!Lq{fGWT{}J2&zlHuo|Dpeg?f>6H|DpfTf5i6x%g}%5
zKlC55{r^YkKlC5^kJ$eIGxQ(&5B*1M|Gx_ThyFwV5!!$GsQ(Ig0E833=}v=X5bgjd
z9~E&Y04sz$0Kx<a6Ci@`0H_%D0AUXho&X|r4^S?uQs_VQANr5j{ws(6L;s=wi0!{h
z=s)xy`j6QDtA_qV|Dpeg?Y~;+KlC5^kJ$dBL;s=w(0|1CUp@36`Vaj_Z2vVv|DpfT
zf5i4*GxQ(&5B*1M|FuH@q5sf-#P(l1^dI^U{YPy7F~8RSzli!GE&rQeY$^HrkH6TH
z^A&&m+81Aoib^Z_din3gd_JA{=T4^m1%Jt3@qK>45BU*)&EN31{FuMv@A(P;z)$%{
z{)vC)U-%h6=U@2+|Hi-bOa6oZ<X8L`VK#)>5V5nNPC;3@sQ;ycukag`<Ewm)-=sWW
z=ePK6eur=HzxiE$kKgAH_$GhI7krC9;@f<OFZpA>%b)P4{2AXPOn~xHt*t{MZD>n7
z+S7rKbfPm!B-4eibfY^xNTDaa=uIkp=u1EPGk}2%VlZi>lR-Ei9C7D^%SXkA8v?>J
zK$rj#d_K5txFH}+fG`0fcml-zTHhfEdjQYg34}dB`KZO#VF^oF#&TA$l2xo`4QpA)
zda}r70~^`IX11`E9CF#lc6P9nUF>ELdF*8$`4q69103WKhdIJgj&Yn5oTQLboTi8~
zoaG$nxxhs(QOsqoaFuIZr-U2a<QBKN!(Hxip9ehT5s!JoQ%ZTpb6)V0SG?v8Z+XXi
zKJbxGlzpRoxu|-f{|eTkB9*926{=E=XsT0#n$)5;G1MWJy2MeB`ZS;+jc800n$nEs
zw4f#Nv?BChK5CJ52>t&W`mZ1Q5B-Pt03-J6e}m9}=s)xyvHdp;{fGWT{}J1NqtJio
zKlC55{WlK%hyFwV5!-*0(0}MZ^dGVPHx2!V{zLx}+kdmrf9OB-AF=&65B-P!L;n%m
ze~Zw6=s)xyvHiCU{fGWT{}J1NeCR*)ANr5b{vALGcL0PF!0ArkWH5xG3}ZMW7|AF`
zGlsE@V>}a>$Rs8+g{e$qIy0EbEM_x@Oy)9=`7B@|iwGxxehnvp62ct-VFH8+5W#1F
zTZcVB*aL(ofC$|Kl#5CX{fGWT{}J1No6vvgKlC55{kIMMhyFwV5!-*e(0}MZ^dGVP
zw-5b?{zLx}+kc19f9OB-AF=&+4E=}xL;n%mf2YuY=s)xyvHf=r{fGWT{}J1NQs_VQ
zANr5j{*y!hq5sf-#P;7M^dI^U{YPm3zL|VD0~}5OraOO|K{x{(f!_tRTQ~z8CP0_~
z5qt)?d)NbnJwSK@h|oPixu_nY|ImNvKVth&3H^uuL;n%mf6vf==s)xyvHkZ7{fGWT
z{}J1N@6dnfKlC55{ilZhL;s=wi0!{m=s)xy`j6QD`-c8Q|Dpeg?Z02>KlC5^kJ$eE
zhyFwVq5p{Oe?aIz^dI_<*!~BG{zLzv|A_5>Q0PDOANr5b{>w)V4tD^A6Ts==4uGf#
zodD&c(!w18VFH8+5W#l<q=!8~*aL(;K*T-)WQ6`h|Dpeg?SDw<KlC5^kJ$c)hW<nU
zq5p{Oe^}^0^dI_<*#3uy{zLzv|A_5>MCd>CANr5j{zr!XL;s=wi0yw==s)xy`j6QD
zM~D7H|Dpeg?SD+@KlC5^kJ$dlhW<nUq5p{Oe_ZH4^dI_<*#5_d{zLzv{|N2BeAI++
z2S7LhoF47~h}=5>CWboz!UPBtAcF4zm=yK^VGj`Y01>(eC>J$3^dI^U{YPy7Q$qit
z|ImNL_CGcBANmjdM{NJoLjR%v(0|1CKRxsx`Vaj_Z2vPt|DpfTf5i4bGxQ(&5B*1M
z|Fc5>q5sf-#P&Zs^dI^U{YPy7b3*^2|ImNL_MaKi{eSV5@BA|QYbl8x+9oG_Q7-C>
zuY5o1+h2`O?3(lsQD2n%f4~1})VI<~%YEm&-}&(mqQ2`D-;es*cmD5ZeevIa^{wLk
zZ+`ZJ^1u99)Q`&jDl;nT+vTGE=i~EB%Kd*Y`RNy5`TVbat9axOzg1N5%^%#%|K{fx
qmH+&r&mZ3||3AO}_s`$T8vVo1pB?p!AAdJ0%8#gQVk^Y`^#1^ckVs+x

literal 0
HcmV?d00001


From 89e8ce7e7543bff7c780203322cec320564afadf Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 20 Oct 2022 13:32:18 -0700
Subject: [PATCH 073/162] Implement C++ JNI for `ParquetChunkedReader`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 java/src/main/native/CMakeLists.txt           |   1 +
 java/src/main/native/src/ChunkedReaderJni.cpp | 155 ++++++++++++++++++
 2 files changed, 156 insertions(+)
 create mode 100644 java/src/main/native/src/ChunkedReaderJni.cpp

diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
index 339f0f439a0..ac05b16b39a 100755
--- a/java/src/main/native/CMakeLists.txt
+++ b/java/src/main/native/CMakeLists.txt
@@ -130,6 +130,7 @@ add_library(
   cudfjni
   src/Aggregation128UtilsJni.cpp
   src/AggregationJni.cpp
+  src/ChunkedReaderJni.cpp
   src/CudfJni.cpp
   src/CudaJni.cpp
   src/ColumnVectorJni.cpp
diff --git a/java/src/main/native/src/ChunkedReaderJni.cpp b/java/src/main/native/src/ChunkedReaderJni.cpp
new file mode 100644
index 00000000000..648e50596df
--- /dev/null
+++ b/java/src/main/native/src/ChunkedReaderJni.cpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//===================================================================
+//
+// TODO: cleanup header
+
+#include <arrow/io/api.h>
+#include <arrow/ipc/api.h>
+#include <cudf/aggregation.hpp>
+#include <cudf/column/column.hpp>
+#include <cudf/concatenate.hpp>
+#include <cudf/copying.hpp>
+#include <cudf/filling.hpp>
+#include <cudf/groupby.hpp>
+#include <cudf/hashing.hpp>
+#include <cudf/interop.hpp>
+#include <cudf/io/avro.hpp>
+#include <cudf/io/csv.hpp>
+#include <cudf/io/data_sink.hpp>
+#include <cudf/io/json.hpp>
+#include <cudf/io/orc.hpp>
+#include <cudf/io/parquet.hpp>
+#include <cudf/join.hpp>
+#include <cudf/lists/explode.hpp>
+#include <cudf/merge.hpp>
+#include <cudf/partitioning.hpp>
+#include <cudf/replace.hpp>
+#include <cudf/reshape.hpp>
+#include <cudf/rolling.hpp>
+#include <cudf/search.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/stream_compaction.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
+#include <rmm/cuda_stream_view.hpp>
+#include <thrust/iterator/counting_iterator.h>
+
+#include "cudf_jni_apis.hpp"
+#include "dtype_utils.hpp"
+#include "jni_compiled_expr.hpp"
+#include "jni_utils.hpp"
+#include "row_conversion.hpp"
+
+// TODO: cleanup this
+namespace cudf::jni {
+jlongArray convert_table_for_return(JNIEnv *env, std::unique_ptr<cudf::table> &&table_result,
+                                    std::vector<std::unique_ptr<cudf::column>> &&extra_columns);
+}
+using cudf::jni::release_as_jlong;
+
+// This file is for the code releated to chunked reader (Parquet, ORC, etc.).
+
+extern "C" {
+
+// This function should take all the parameters that `Table.readParquet` takes,
+// plus one more parameter `long chunkSizeByteLimit`.
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_create(
+    JNIEnv *env, jclass, jlong chunk_size_byte_limit, jobjectArray filter_col_names,
+    jbooleanArray j_col_binary_read, jstring inputfilepath, jlong buffer, jlong buffer_length,
+    jint unit) {
+
+  JNI_NULL_CHECK(env, j_col_binary_read, "null col_binary_read", 0);
+  bool read_buffer = true;
+  if (buffer == 0) {
+    JNI_NULL_CHECK(env, inputfilepath, "input file or buffer must be supplied", NULL);
+    read_buffer = false;
+  } else if (inputfilepath != NULL) {
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException",
+                  "cannot pass in both a buffer and an inputfilepath", NULL);
+  } else if (buffer_length <= 0) {
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "An empty buffer is not supported",
+                  NULL);
+  }
+
+  try {
+    cudf::jni::auto_set_device(env);
+    cudf::jni::native_jstring filename(env, inputfilepath);
+    if (!read_buffer && filename.is_empty()) {
+      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "inputfilepath can't be empty",
+                    NULL);
+    }
+
+    cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names);
+    cudf::jni::native_jbooleanArray n_col_binary_read(env, j_col_binary_read);
+
+    auto const source = read_buffer ?
+                            cudf::io::source_info(reinterpret_cast<char *>(buffer),
+                                                  static_cast<std::size_t>(buffer_length)) :
+                            cudf::io::source_info(filename.get());
+
+    auto builder = cudf::io::chunked_parquet_reader_options::builder(source);
+    if (n_filter_col_names.size() > 0) {
+      builder = builder.columns(n_filter_col_names.as_cpp_vector());
+    }
+
+    auto const read_opts = builder.convert_strings_to_categories(false)
+                               .timestamp_type(cudf::data_type(static_cast<cudf::type_id>(unit)))
+                               .byte_limit(chunk_size_byte_limit)
+                               .build();
+    return reinterpret_cast<jlong>(new cudf::io::chunked_parquet_reader(read_opts));
+  }
+  CATCH_STD(env, NULL);
+}
+
+JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_hasNext(JNIEnv *env, jclass,
+                                                                            jlong handle) {
+  JNI_NULL_CHECK(env, handle, "handle is null", nullptr);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const reader_ptr = reinterpret_cast<cudf::io::chunked_parquet_reader *const>(handle);
+    return reader_ptr->has_next();
+  }
+  CATCH_STD(env, nullptr);
+}
+
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_readChunk(JNIEnv *env, jclass,
+                                                                                jlong handle) {
+  JNI_NULL_CHECK(env, handle, "handle is null", nullptr);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const reader_ptr = reinterpret_cast<cudf::io::chunked_parquet_reader *const>(handle);
+    auto chunk = reader_ptr->read_chunk();
+    return chunk.tbl ? cudf::jni::convert_table_for_return(env, chunk.tbl) : nullptr;
+  }
+  CATCH_STD(env, nullptr);
+}
+
+JNIEXPORT void JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_close(JNIEnv *env, jclass,
+                                                                      jlong handle) {
+  JNI_NULL_CHECK(env, handle, "handle is null", );
+
+  try {
+    cudf::jni::auto_set_device(env);
+    delete reinterpret_cast<cudf::io::chunked_parquet_reader *>(handle);
+  }
+  CATCH_STD(env, nullptr);
+}
+
+} // extern "C"

From 50e1a8127ed671a7ff288984a3be359d998f670a Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 20 Oct 2022 14:47:29 -0700
Subject: [PATCH 074/162] Fix off-by-one memory access bug

---
 cpp/src/io/parquet/reader_preprocess.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index 32967b255ac..e9d48ea30e3 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -206,7 +206,7 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
   // clang-format on
 
   thrust::exclusive_scan(
-    rmm::exec_policy(stream), key_offsets.begin(), key_offsets.end() + 1, key_offsets.begin());
+    rmm::exec_policy(stream), key_offsets.begin(), key_offsets.end(), key_offsets.begin());
   // clang-format off
   /*
   stream.synchronize();

From 5adb11a4bd5e30060a2d76130a25b8d402860ff0 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 20 Oct 2022 15:11:50 -0700
Subject: [PATCH 075/162] Cleanup

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 java/src/main/native/src/ChunkedReaderJni.cpp | 41 +++++++++----------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/java/src/main/native/src/ChunkedReaderJni.cpp b/java/src/main/native/src/ChunkedReaderJni.cpp
index 648e50596df..e5a1813ebfe 100644
--- a/java/src/main/native/src/ChunkedReaderJni.cpp
+++ b/java/src/main/native/src/ChunkedReaderJni.cpp
@@ -49,10 +49,11 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <thrust/iterator/counting_iterator.h>
 
+#include "../include/jni_utils.hpp"
+
 #include "cudf_jni_apis.hpp"
 #include "dtype_utils.hpp"
 #include "jni_compiled_expr.hpp"
-#include "jni_utils.hpp"
 #include "row_conversion.hpp"
 
 // TODO: cleanup this
@@ -72,26 +73,23 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_create(
     JNIEnv *env, jclass, jlong chunk_size_byte_limit, jobjectArray filter_col_names,
     jbooleanArray j_col_binary_read, jstring inputfilepath, jlong buffer, jlong buffer_length,
     jint unit) {
-
   JNI_NULL_CHECK(env, j_col_binary_read, "null col_binary_read", 0);
   bool read_buffer = true;
   if (buffer == 0) {
-    JNI_NULL_CHECK(env, inputfilepath, "input file or buffer must be supplied", NULL);
+    JNI_NULL_CHECK(env, inputfilepath, "input file or buffer must be supplied", 0);
     read_buffer = false;
-  } else if (inputfilepath != NULL) {
+  } else if (inputfilepath != nullptr) {
     JNI_THROW_NEW(env, "java/lang/IllegalArgumentException",
-                  "cannot pass in both a buffer and an inputfilepath", NULL);
+                  "cannot pass in both a buffer and an inputfilepath", 0);
   } else if (buffer_length <= 0) {
-    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "An empty buffer is not supported",
-                  NULL);
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "An empty buffer is not supported", 0);
   }
 
   try {
     cudf::jni::auto_set_device(env);
     cudf::jni::native_jstring filename(env, inputfilepath);
     if (!read_buffer && filename.is_empty()) {
-      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "inputfilepath can't be empty",
-                    NULL);
+      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "inputfilepath can't be empty", 0);
     }
 
     cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names);
@@ -102,35 +100,36 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_create(
                                                   static_cast<std::size_t>(buffer_length)) :
                             cudf::io::source_info(filename.get());
 
-    auto builder = cudf::io::chunked_parquet_reader_options::builder(source);
+    // TODO: use builder
+    auto read_opts = cudf::io::chunked_parquet_reader_options::builder(source)
+                         .byte_limit(chunk_size_byte_limit)
+                         .build();
     if (n_filter_col_names.size() > 0) {
-      builder = builder.columns(n_filter_col_names.as_cpp_vector());
+      read_opts.set_columns(n_filter_col_names.as_cpp_vector());
     }
+    read_opts.enable_convert_strings_to_categories(false);
+    read_opts.set_timestamp_type(cudf::data_type(static_cast<cudf::type_id>(unit)));
 
-    auto const read_opts = builder.convert_strings_to_categories(false)
-                               .timestamp_type(cudf::data_type(static_cast<cudf::type_id>(unit)))
-                               .byte_limit(chunk_size_byte_limit)
-                               .build();
     return reinterpret_cast<jlong>(new cudf::io::chunked_parquet_reader(read_opts));
   }
-  CATCH_STD(env, NULL);
+  CATCH_STD(env, 0);
 }
 
 JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_hasNext(JNIEnv *env, jclass,
                                                                             jlong handle) {
-  JNI_NULL_CHECK(env, handle, "handle is null", nullptr);
+  JNI_NULL_CHECK(env, handle, "handle is null", false);
 
   try {
     cudf::jni::auto_set_device(env);
     auto const reader_ptr = reinterpret_cast<cudf::io::chunked_parquet_reader *const>(handle);
     return reader_ptr->has_next();
   }
-  CATCH_STD(env, nullptr);
+  CATCH_STD(env, false);
 }
 
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_readChunk(JNIEnv *env, jclass,
                                                                                 jlong handle) {
-  JNI_NULL_CHECK(env, handle, "handle is null", nullptr);
+  JNI_NULL_CHECK(env, handle, "handle is null", 0);
 
   try {
     cudf::jni::auto_set_device(env);
@@ -138,7 +137,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_readChunk(
     auto chunk = reader_ptr->read_chunk();
     return chunk.tbl ? cudf::jni::convert_table_for_return(env, chunk.tbl) : nullptr;
   }
-  CATCH_STD(env, nullptr);
+  CATCH_STD(env, 0);
 }
 
 JNIEXPORT void JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_close(JNIEnv *env, jclass,
@@ -149,7 +148,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_close(JNIEnv *en
     cudf::jni::auto_set_device(env);
     delete reinterpret_cast<cudf::io::chunked_parquet_reader *>(handle);
   }
-  CATCH_STD(env, nullptr);
+  CATCH_STD(env, );
 }
 
 } // extern "C"

From 7cda8c2fc7283fb4edff5a87fd2d93996cc54a97 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Thu, 20 Oct 2022 17:12:00 -0500
Subject: [PATCH 076/162] Fixed an issue with non-first reads in the chunked
 reader.  Made an attempt to cleanup and make several pieces of logic less
 confusin: specifically where we compute first_row / num_rows to be read for
 each page, and the values/variables we pass to the precompute kernels.

---
 cpp/src/io/parquet/page_data.cu         | 63 +++++++++++++------------
 cpp/src/io/parquet/parquet_gpu.hpp      |  1 +
 cpp/src/io/parquet/reader_preprocess.cu | 46 +++++++++++++++---
 3 files changed, 73 insertions(+), 37 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index e8348605405..9b69aea78b0 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -848,8 +848,7 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
                                           PageInfo const* p,
                                           device_span<ColumnChunkDesc const> chunks,
                                           size_t min_row,
-                                          size_t num_rows,
-                                          int page_idx = 0)
+                                          size_t num_rows)
 {
   int t = threadIdx.x;
   int chunk_idx;
@@ -945,18 +944,19 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
       // NOTE: s->page.num_rows, s->col.chunk_row, s->first_row and s->num_rows will be 
       // invalid/bogus during first pass of the preprocess step for nested types. this is ok
       // because we ignore these values in that stage.
-      {
-        // first row within the page to output
-        if (page_start_row >= min_row) {
+      { 
+        auto const max_row = min_row + num_rows;
+        
+        // if we are totally outside the range of the input, do nothing
+        if((page_start_row > max_row) || (page_start_row + s->page.num_rows < min_row)){
           s->first_row = 0;
-        } else {
-          s->first_row = (int32_t)min(min_row - page_start_row, (size_t)s->page.num_rows);
+          s->num_rows = 0;
         }
-        // # of rows within the page to output
-        s->num_rows = s->page.num_rows;
-        if ((page_start_row + s->first_row) + s->num_rows > min_row + num_rows) {
-          s->num_rows =
-            (int32_t)max((int64_t)(min_row + num_rows - (page_start_row + s->first_row)), INT64_C(0));
+        // otherwise
+        else {
+          s->first_row = page_start_row >= min_row ? 0 : min_row - page_start_row;
+          auto const max_page_rows = s->page.num_rows - s->first_row;
+          s->num_rows = (page_start_row + s->first_row) + max_page_rows <= max_row ? max_page_rows : max_row - (page_start_row + s->first_row);
         }
       }
 
@@ -1091,8 +1091,8 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
       } else {
         s->input_value_count        = 0;
         s->input_leaf_count         = 0;
-        s->page.skipped_values      = -1;
-        s->page.skipped_leaf_values = -1;
+        s->page.skipped_values      = -1;   // magic number to indicate it hasn't been set for use inside UpdatePageSizes
+        s->page.skipped_leaf_values = 0;
       }
     }
 
@@ -1473,7 +1473,6 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
       uint32_t const count_mask = ballot(in_nesting_bounds);
       if (!t) {
         s->page.nesting[s_idx].size += __popc(count_mask);
-        // printf("New size (%d): %d\n", s_idx, s->page.nesting[s_idx].size);
       }
 
       // string lengths, if applicable
@@ -1484,8 +1483,6 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
             if (is_new_leaf) {
               int const src_pos  = input_leaf_count + __popc(warp_leaf_count_mask & ((1 << t) - 1));
               auto const str_len = gpuGetStringSize(s, src_pos);
-              // printf("S(%d): len(%d), src_pos(%d), input_leaf_count(%d)\n", t, str_len, src_pos,
-              // input_leaf_count);
               return str_len;
             }
             return 0;
@@ -1534,27 +1531,28 @@ __global__ void __launch_bounds__(block_size)
                       device_span<ColumnChunkDesc const> chunks,
                       size_t min_row,
                       size_t num_rows,
-                      bool trim_pass)
+                      bool compute_num_rows_pass,
+                      bool compute_string_sizes)
 {
   __shared__ __align__(16) page_state_s state_g;
 
   page_state_s* const s = &state_g;
-  int page_idx          = blockIdx.x;
+  int  page_idx         = blockIdx.x;
   int t                 = threadIdx.x;
   PageInfo* pp          = &pages[page_idx];
 
-  if (!setupLocalPageInfo(s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX, page_idx)) {
+  if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows)) {
     return;
   }
 
   // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
   // containing lists anywhere within).
   bool const has_repetition   = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
-  bool const is_string_column = (s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4;
+  compute_string_sizes = compute_string_sizes && ((s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4);  
 
   // if this is a flat hierarchy (no lists) and is not a string column, compute the size directly
   // from the number of values.
-  if (!has_repetition && !is_string_column) {
+  if (!has_repetition && !compute_string_sizes) {
     if (!t) {
       // note: doing this for all nesting level because we can still have structs even if we don't
       // have lists.
@@ -1563,7 +1561,7 @@ __global__ void __launch_bounds__(block_size)
       }
     }
     return;
-  }
+  }  
 
   // zero sizes
   int d = 0;
@@ -1573,13 +1571,13 @@ __global__ void __launch_bounds__(block_size)
   }
   if (!t) {
     s->page.skipped_values      = -1;
-    s->page.skipped_leaf_values = -1;
+    s->page.skipped_leaf_values = 0;
     s->page.str_bytes           = 0;
     s->input_row_count          = 0;
     s->input_value_count        = 0;
 
-    // if this isn't the trim pass, make sure we visit absolutely everything
-    if (!trim_pass) {
+    // if we're computing the number of rows, make sure we visit absolutely everything
+    if (compute_num_rows_pass) {
       s->first_row             = 0;
       s->num_rows              = INT_MAX;
       s->row_index_lower_bound = -1;
@@ -1610,7 +1608,7 @@ __global__ void __launch_bounds__(block_size)
       actual_input_count     = min(actual_input_count, s->num_input_values);
 
       // process what we got back
-      if (is_string_column) {
+      if (compute_string_sizes) {
         auto src_target_pos = target_input_count;
         // TODO: compute this in another warp like the decode step does
         if (s->dict_base) {
@@ -1620,9 +1618,9 @@ __global__ void __launch_bounds__(block_size)
         }
         if (!t) { *(volatile int32_t*)&s->dict_pos = src_target_pos; }
 
-        gpuUpdatePageSizes<true>(s, actual_input_count, t, trim_pass);
+        gpuUpdatePageSizes<true>(s, actual_input_count, t, !compute_num_rows_pass);
       } else {
-        gpuUpdatePageSizes<false>(s, actual_input_count, t, trim_pass);
+        gpuUpdatePageSizes<false>(s, actual_input_count, t, !compute_num_rows_pass);
       }
 
       // target_input_count = actual_input_count + batch_size;
@@ -1632,7 +1630,9 @@ __global__ void __launch_bounds__(block_size)
   }
   // update # rows in the actual page
   if (!t) {
-    pp->num_rows            = s->page.nesting[0].size;
+    if(compute_num_rows_pass){
+      pp->num_rows           = s->page.nesting[0].size;
+    }
     pp->skipped_values      = s->page.skipped_values;
     pp->skipped_leaf_values = s->page.skipped_leaf_values;
     pp->str_bytes           = s->page.str_bytes;
@@ -1786,6 +1786,7 @@ void ComputePageSizes(hostdevice_vector<PageInfo>& pages,
                       size_t min_row,
                       size_t num_rows,
                       bool trim_pass,
+                      bool compute_string_sizes,
                       rmm::cuda_stream_view stream)
 {
   dim3 dim_block(block_size, 1);
@@ -1797,7 +1798,7 @@ void ComputePageSizes(hostdevice_vector<PageInfo>& pages,
   // If uses_custom_row_bounds is set to true, we have to do a second pass later that "trims"
   // the starting and ending read values to account for these bounds.
   gpuComputePageSizes<<<dim_grid, dim_block, 0, stream.value()>>>(
-    pages.device_ptr(), chunks, min_row, num_rows, trim_pass);
+    pages.device_ptr(), chunks, min_row, num_rows, trim_pass, compute_string_sizes);
 }
 
 /**
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 6a67f510dea..d749b1df044 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -441,6 +441,7 @@ void ComputePageSizes(hostdevice_vector<PageInfo>& pages,
                       size_t num_rows,
                       size_t min_row,
                       bool trim_pass,
+                      bool compute_string_sizes,
                       rmm::cuda_stream_view stream);
 
 /**
diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index 32967b255ac..98aa029b8ce 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -33,6 +33,30 @@ namespace cudf::io::detail::parquet {
 using namespace cudf::io::parquet;
 using namespace cudf::io;
 
+void print_pages(hostdevice_vector<gpu::PageInfo>& pages, rmm::cuda_stream_view _stream)
+{
+  pages.device_to_host(_stream, true);
+  for(size_t idx=0; idx<pages.size(); idx++){
+    auto const& p = pages[idx];
+    // skip dictionary pages
+    if(p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY){
+      continue;
+    }    
+    printf("P(%lu, s:%d): chunk_row(%d), num_rows(%d), skipped_values(%d), skipped_leaf_values(%d)\n",
+           idx, p.src_col_schema, p.chunk_row, p.num_rows, p.skipped_values, p.skipped_leaf_values);
+  }
+}
+
+void print_chunks(hostdevice_vector<gpu::ColumnChunkDesc>& chunks, rmm::cuda_stream_view _stream)
+{
+  chunks.device_to_host(_stream, true);
+  for(size_t idx=0; idx<chunks.size(); idx++){
+    auto const& c = chunks[idx];
+    printf("C(%lu, s:%d): num_values(%lu), start_row(%lu), num_rows(%u)\n",
+           idx, c.src_col_schema, c.num_values, c.start_row, c.num_rows);
+  }
+}
+
 namespace {
 
 struct cumulative_row_info {
@@ -476,10 +500,11 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
     auto const will_trim_later = uses_custom_row_bounds || chunked_read_size > 0;
     gpu::ComputePageSizes(pages,
                           chunks,
-                          will_trim_later ? min_row : 0,
-                          will_trim_later ? num_rows : INT_MAX,
-                          !will_trim_later,
-                          _stream);
+                          0,
+                          INT_MAX,
+                          true,                    // compute num_rows     
+                          chunked_read_size > 0,   // compute string sizes
+                          _stream);    
 
     // computes:
     // PageInfo::chunk_row for all pages
@@ -532,6 +557,8 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
 
     // retrieve pages back
     pages.device_to_host(_stream, true);
+
+    //print_pages(pages, _stream);
   }
 
   // compute splits if necessary.
@@ -554,8 +581,15 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
   // specified artifical bounds).
   if (uses_custom_row_bounds) {
-    gpu::ComputePageSizes(pages, chunks, min_row, num_rows, true, _stream);
-  }
+    gpu::ComputePageSizes(pages, 
+                          chunks, 
+                          min_row, 
+                          num_rows, 
+                          false,      // num_rows is already computed
+                          false,      // no need to compute string sizes
+                          _stream);
+    //print_pages(pages, _stream);
+  }  
 
   // iterate over all input columns and allocate any associated output
   // buffers if they are not part of a list hierarchy. mark down

From 2aef5cc64732ba3d36a06dc8fd384cdaf88351ed Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 20 Oct 2022 15:19:31 -0700
Subject: [PATCH 077/162] Fix off-by-one bug

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_preprocess.cu | 50 ++++++++++++++-----------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index 98aa029b8ce..c4b8cd5e926 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -36,24 +36,32 @@ using namespace cudf::io;
 void print_pages(hostdevice_vector<gpu::PageInfo>& pages, rmm::cuda_stream_view _stream)
 {
   pages.device_to_host(_stream, true);
-  for(size_t idx=0; idx<pages.size(); idx++){
+  for (size_t idx = 0; idx < pages.size(); idx++) {
     auto const& p = pages[idx];
     // skip dictionary pages
-    if(p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY){
-      continue;
-    }    
-    printf("P(%lu, s:%d): chunk_row(%d), num_rows(%d), skipped_values(%d), skipped_leaf_values(%d)\n",
-           idx, p.src_col_schema, p.chunk_row, p.num_rows, p.skipped_values, p.skipped_leaf_values);
+    if (p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) { continue; }
+    printf(
+      "P(%lu, s:%d): chunk_row(%d), num_rows(%d), skipped_values(%d), skipped_leaf_values(%d)\n",
+      idx,
+      p.src_col_schema,
+      p.chunk_row,
+      p.num_rows,
+      p.skipped_values,
+      p.skipped_leaf_values);
   }
 }
 
 void print_chunks(hostdevice_vector<gpu::ColumnChunkDesc>& chunks, rmm::cuda_stream_view _stream)
 {
   chunks.device_to_host(_stream, true);
-  for(size_t idx=0; idx<chunks.size(); idx++){
+  for (size_t idx = 0; idx < chunks.size(); idx++) {
     auto const& c = chunks[idx];
     printf("C(%lu, s:%d): num_values(%lu), start_row(%lu), num_rows(%u)\n",
-           idx, c.src_col_schema, c.num_values, c.start_row, c.num_rows);
+           idx,
+           c.src_col_schema,
+           c.num_values,
+           c.start_row,
+           c.num_rows);
   }
 }
 
@@ -230,7 +238,7 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
   // clang-format on
 
   thrust::exclusive_scan(
-    rmm::exec_policy(stream), key_offsets.begin(), key_offsets.end() + 1, key_offsets.begin());
+    rmm::exec_policy(stream), key_offsets.begin(), key_offsets.end(), key_offsets.begin());
   // clang-format off
   /*
   stream.synchronize();
@@ -502,9 +510,9 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
                           chunks,
                           0,
                           INT_MAX,
-                          true,                    // compute num_rows     
-                          chunked_read_size > 0,   // compute string sizes
-                          _stream);    
+                          true,                   // compute num_rows
+                          chunked_read_size > 0,  // compute string sizes
+                          _stream);
 
     // computes:
     // PageInfo::chunk_row for all pages
@@ -558,7 +566,7 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
     // retrieve pages back
     pages.device_to_host(_stream, true);
 
-    //print_pages(pages, _stream);
+    // print_pages(pages, _stream);
   }
 
   // compute splits if necessary.
@@ -581,15 +589,15 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
   // specified artifical bounds).
   if (uses_custom_row_bounds) {
-    gpu::ComputePageSizes(pages, 
-                          chunks, 
-                          min_row, 
-                          num_rows, 
-                          false,      // num_rows is already computed
-                          false,      // no need to compute string sizes
+    gpu::ComputePageSizes(pages,
+                          chunks,
+                          min_row,
+                          num_rows,
+                          false,  // num_rows is already computed
+                          false,  // no need to compute string sizes
                           _stream);
-    //print_pages(pages, _stream);
-  }  
+    // print_pages(pages, _stream);
+  }
 
   // iterate over all input columns and allocate any associated output
   // buffers if they are not part of a list hierarchy. mark down

From 5245b9b7bbdfa6157d01288677488e6a70d6fdf4 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Thu, 20 Oct 2022 18:53:17 -0500
Subject: [PATCH 078/162] Fix an issue related to aliased output pointers in
 the chunked read case.  We were checking for nullptr to indicate whether or
 not we were in the decode step, but that wasn't valid for the chunked read
 case because the pointers temporarily become stale.

---
 cpp/src/io/parquet/page_data.cu | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 9b69aea78b0..04e8ac678f5 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -848,7 +848,8 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
                                           PageInfo const* p,
                                           device_span<ColumnChunkDesc const> chunks,
                                           size_t min_row,
-                                          size_t num_rows)
+                                          size_t num_rows,
+                                          bool decode_step)
 {
   int t = threadIdx.x;
   int chunk_idx;
@@ -965,7 +966,11 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
       // is responsible for.
       // - for flat schemas, we can do this directly by using row counts
       // - for nested schemas, these offsets are computed during the preprocess step
-      if (s->col.column_data_base != nullptr) {
+      //
+      // NOTE: in a chunked read situation, s->col.column_data_base and s->col.valid_map_base
+      // will be aliased to memory that has been freed when we get here in the non-decode step, so
+      // we cannot check against nullptr.  we'll just check a flag directly.
+      if (decode_step) {
         int max_depth = s->col.max_nesting_depth;
         for (int idx = 0; idx < max_depth; idx++) {
           PageNestingInfo* pni = &s->page.nesting[idx];
@@ -981,6 +986,7 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
           }
 
           pni->data_out = static_cast<uint8_t*>(s->col.column_data_base[idx]);
+
           if (pni->data_out != nullptr) {
             // anything below max depth with a valid data pointer must be a list, so the
             // element size is the size of the offset type.
@@ -1086,7 +1092,7 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
 
       // if we're in the decoding step, jump directly to the first
       // value we care about
-      if (s->col.column_data_base != nullptr) {
+      if (decode_step) {
         s->input_value_count = s->page.skipped_values > -1 ? s->page.skipped_values : 0;
       } else {
         s->input_value_count        = 0;
@@ -1541,7 +1547,7 @@ __global__ void __launch_bounds__(block_size)
   int t                 = threadIdx.x;
   PageInfo* pp          = &pages[page_idx];
 
-  if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows)) {
+  if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, false)) {
     return;
   }
 
@@ -1561,7 +1567,7 @@ __global__ void __launch_bounds__(block_size)
       }
     }
     return;
-  }  
+  }
 
   // zero sizes
   int d = 0;
@@ -1662,7 +1668,7 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
   int t                 = threadIdx.x;
   int out_thread0;
 
-  if (!setupLocalPageInfo(s, &pages[page_idx], chunks, min_row, num_rows)) { return; }
+  if (!setupLocalPageInfo(s, &pages[page_idx], chunks, min_row, num_rows, true)) { return; }
 
   if (s->dict_base) {
     out_thread0 = (s->dict_bits > 0) ? 64 : 32;

From 1be7f919cb1e6dcfb890934a6c3008cb624911be Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 20 Oct 2022 19:42:04 -0700
Subject: [PATCH 079/162] Do not keep reference---copy object instead

---
 cpp/src/io/parquet/reader_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 3185d6e9554..8c1bb9568a4 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -283,7 +283,7 @@ class reader::impl {
   bool _file_preprocessed{false};
 
   // TODO: Remove below
-  parquet_reader_options const& _options;
+  parquet_reader_options const _options;
 };
 
 }  // namespace parquet

From 62283c74ea249a62f5d2afa1fafd3cc3778ae920 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 20 Oct 2022 19:42:04 -0700
Subject: [PATCH 080/162] Do not keep reference---copy object instead

---
 cpp/src/io/parquet/reader_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 3185d6e9554..8c1bb9568a4 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -283,7 +283,7 @@ class reader::impl {
   bool _file_preprocessed{false};
 
   // TODO: Remove below
-  parquet_reader_options const& _options;
+  parquet_reader_options const _options;
 };
 
 }  // namespace parquet

From 44424a2f1e67e441c3df1061b57ffdb6159c2fab Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Mon, 24 Oct 2022 09:39:32 -0500
Subject: [PATCH 081/162] Optimization:  don't do any decoding or page size
 computation for pages that have been trimmed.

---
 cpp/src/io/parquet/page_data.cu         |  21 +++--
 cpp/src/io/parquet/reader_preprocess.cu | 106 +++---------------------
 2 files changed, 28 insertions(+), 99 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 04e8ac678f5..0d7562865c2 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -980,7 +980,7 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
           if (s->col.max_level[level_type::REPETITION] == 0) {
             output_offset = page_start_row >= min_row ? page_start_row - min_row : 0;
           }
-          // for schemas with lists, we've already got the exactly value precomputed
+          // for schemas with lists, we've already got the exact value precomputed
           else {
             output_offset = pni->page_start_value;
           }
@@ -1556,11 +1556,13 @@ __global__ void __launch_bounds__(block_size)
   bool const has_repetition   = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
   compute_string_sizes = compute_string_sizes && ((s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4);  
 
-  // if this is a flat hierarchy (no lists) and is not a string column, compute the size directly
-  // from the number of values.
-  if (!has_repetition && !compute_string_sizes) {
+  // reasons we might want to early out:
+  // - if this is a flat hierarchy (no lists) and is not a string column. in this case we don't need to do 
+  //   the expensive work of traversing the level data to determine sizes.  we can just compute it directly.
+  // - if this is the trim pass and we have no rows to output for this page.
+  if (!has_repetition && !compute_string_sizes) {    
     if (!t) {
-      // note: doing this for all nesting level because we can still have structs even if we don't
+      // note: doing this for all nesting levels because we can still have structs even if we don't
       // have lists.
       for (size_type idx = 0; idx < pp->num_nesting_levels; idx++) {
         pp->nesting[idx].size = pp->num_input_values;
@@ -1589,6 +1591,10 @@ __global__ void __launch_bounds__(block_size)
       s->row_index_lower_bound = -1;
     }
   }
+  // if we have no work to do for this page.
+  if(!compute_num_rows_pass && s->num_rows == 0){
+    return;
+  }
   __syncthreads();
 
   // optimization : it might be useful to have a version of gpuDecodeStream that could go wider than
@@ -1670,6 +1676,11 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
 
   if (!setupLocalPageInfo(s, &pages[page_idx], chunks, min_row, num_rows, true)) { return; }
 
+  // if we have no rows to do (eg, in a skip_rows/num_rows case)
+  if(s->num_rows == 0){
+    return;
+  }
+
   if (s->dict_base) {
     out_thread0 = (s->dict_bits > 0) ? 64 : 32;
   } else {
diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index c4b8cd5e926..9a6a2388da8 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -33,6 +33,7 @@ namespace cudf::io::detail::parquet {
 using namespace cudf::io::parquet;
 using namespace cudf::io;
 
+#if defined(PREPROCESS_DEBUG)
 void print_pages(hostdevice_vector<gpu::PageInfo>& pages, rmm::cuda_stream_view _stream)
 {
   pages.device_to_host(_stream, true);
@@ -64,6 +65,7 @@ void print_chunks(hostdevice_vector<gpu::ColumnChunkDesc>& chunks, rmm::cuda_str
            c.num_rows);
   }
 }
+#endif  // PREPROCESS_DEBUG
 
 namespace {
 
@@ -490,32 +492,24 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
   // intermediate data we will need for further chunked reads
   if (has_lists || chunked_read_size > 0) {
     // computes:
-    // PageNestingInfo::size for each level of nesting, for each page.
-    // This computes the size for the entire page, not taking row bounds into account.
-    /*
-    gpuComputePageSizes<<<dim_grid, dim_block, 0, stream.value()>>>(
-      pages.device_ptr(),
-      chunks,
-      // if uses_custom_row_bounds is false, include all possible rows.
-      uses_custom_row_bounds ? min_row : 0,
-      uses_custom_row_bounds ? num_rows : INT_MAX,
-      !uses_custom_row_bounds);
-    */
-    // we will be applying a later trim pass if skip_rows/num_rows is being used, which can happen
+    // PageNestingInfo::num_rows for each page. the true number of rows (taking repetition into account), not
+    // just the number of values.
+    // PageNestingInfo::size for each level of nesting, for each page. 
+    //
+    // we will be applying a later "trim" pass if skip_rows/num_rows is being used, which can happen
     // if:
     // - user has passed custom row bounds
     // - if we will be doing a chunked read
-    auto const will_trim_later = uses_custom_row_bounds || chunked_read_size > 0;
     gpu::ComputePageSizes(pages,
                           chunks,
-                          0,
-                          INT_MAX,
+                          0,                      // 0-max size_t. process all possible rows
+                          std::numeric_limits<size_t>::max(),
                           true,                   // compute num_rows
                           chunked_read_size > 0,  // compute string sizes
                           _stream);
 
     // computes:
-    // PageInfo::chunk_row for all pages
+    // PageInfo::chunk_row (the absolute start row index) for all pages
     // Note: this is doing some redundant work for pages in flat hierarchies.  chunk_row has already
     // been computed during header decoding. the overall amount of work here is very small though.
     auto key_input  = thrust::make_transform_iterator(pages.device_ptr(), get_page_chunk_idx{});
@@ -585,7 +579,8 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
 {
   // computes:
   // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
-  // PageInfo::skipped_values, which tells us where to start decoding in the input.
+  // PageInfo::skipped_values, which tells us where to start decoding in the input to respect the 
+  // user bounds.
   // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
   // specified artifical bounds).
   if (uses_custom_row_bounds) {
@@ -682,80 +677,3 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
 }
 
 }  // namespace cudf::io::detail::parquet
-
-/*
-{
-  std::mt19937 gen(6542);
-  std::bernoulli_distribution bn(0.7f);
-  //auto valids =
-//    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
-  auto values = thrust::make_counting_iterator(0);
-
-  constexpr size_type num_rows = 40000;
-  cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
-  cudf::test::fixed_width_column_wrapper<int64_t> b(values, values + num_rows);
-
-  cudf::table_view t({a, b});
-  cudf::io::parquet_writer_options opts =
-cudf::io::parquet_writer_options::builder(cudf::io::sink_info{"parquet/tmp/chunked_splits.parquet"},
-t); cudf::io::write_parquet(opts);
-
-  cudf::io::parquet_reader_options in_opts =
-cudf::io::parquet_reader_options::builder(cudf::io::source_info{"parquet/tmp/chunked_splits.parquet"});
-  auto result = cudf::io::read_parquet(in_opts);
-}
-*/
-
-/*
-{
-    // values the cudf parquet writer uses
-    // constexpr size_t default_max_page_size_bytes    = 512 * 1024;   ///< 512KB per page
-    // constexpr size_type default_max_page_size_rows  = 20000;        ///< 20k rows per page
-
-    std::mt19937 gen(6542);
-    std::bernoulli_distribution bn(0.7f);
-    auto values = thrust::make_counting_iterator(0);
-
-    constexpr size_type num_rows = 60000;
-
-    // ints                                            Page    total bytes   cumulative bytes
-    // 20000 rows of 4 bytes each                    = A0      80000         80000
-    // 20000 rows of 4 bytes each                    = A1      80000         160000
-    // 20000 rows of 4 bytes each                    = A2      80000         240000
-    cudf::test::fixed_width_column_wrapper<int> a(values, values + num_rows);
-
-    // strings                                         Page    total bytes   cumulative bytes
-    // 20000 rows of 1 char each    (20000  + 80004) = B0      100004        100004
-    // 20000 rows of 4 chars each   (80000  + 80004) = B1      160004        260008
-    // 20000 rows of 16 chars each  (320000 + 80004) = B2      400004        660012
-    std::vector<std::string> strings { "a", "bbbb", "cccccccccccccccc" };
-    auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int i){
-      if(i < 20000){
-        return strings[0];
-      }
-      if(i < 40000){
-        return strings[1];
-      }
-      return strings[2];
-    });
-    cudf::test::strings_column_wrapper b{str_iter, str_iter + num_rows};
-
-    // cumulative sizes
-    // A0 + B0 :  180004
-    // A1 + B1 :  420008
-    // A2 + B2 :  900012
-    //                                                    skip_rows / num_rows
-    // chunked_read_size of 500000  should give 2 chunks: {0, 40000},           {40000, 20000}
-    // chunked_read_size of 1000000 should give 1 chunks: {0, 60000},
-
-    auto write_tbl = table_view{{a, b}};
-    auto filepath = std::string{"parquet/tmp/chunked_splits_strings.parquet"};
-    cudf::io::parquet_writer_options out_opts =
-      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, write_tbl);
-    cudf::io::write_parquet(out_opts);
-
-    cudf::io::parquet_reader_options in_opts =
-      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
-    auto result   = cudf::io::read_parquet(in_opts);
-  }
-  */

From 445db9b4bac04a34ea03d7e0cae65fa1eb51ff78 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Mon, 24 Oct 2022 10:44:18 -0500
Subject: [PATCH 082/162] Fix build issue for spark-rapids-jni

---
 cpp/src/io/functions.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 6aa53ead955..c548687e937 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -478,7 +478,7 @@ std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const
 chunked_parquet_reader::chunked_parquet_reader(chunked_parquet_reader_options const& options,
                                                rmm::mr::device_memory_resource* mr)
   : reader{std::make_unique<detail_parquet::chunked_reader>(
-      make_datasources(options.get_source()), options, cudf::default_stream_value, mr)}
+      make_datasources(options.get_source()), options, cudf::detail::default_stream_value, mr)}
 {
 }
 

From b50b563af05a7d7085af83cddca3c377a8796e60 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 24 Oct 2022 09:48:21 -0700
Subject: [PATCH 083/162] Cleanup: Remove `chunked_parquet_reader_options` and
 `chunked_parquet_reader_options_builder`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/detail/parquet.hpp       |   5 +-
 cpp/include/cudf/io/parquet.hpp              | 151 ++-----------------
 cpp/src/io/functions.cpp                     |  29 +---
 cpp/src/io/parquet/reader_impl.cu            |  15 +-
 cpp/src/io/parquet/reader_impl.hpp           |   7 +
 cpp/tests/io/parquet_chunked_reader_test.cpp |   6 +-
 6 files changed, 41 insertions(+), 172 deletions(-)

diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp
index 3bba9ddc841..eac66ef5938 100644
--- a/cpp/include/cudf/io/detail/parquet.hpp
+++ b/cpp/include/cudf/io/detail/parquet.hpp
@@ -96,8 +96,9 @@ class chunked_reader : reader {
    * @param stream CUDA stream used for device memory operations and kernel launches.
    * @param mr Device memory resource to use for device memory allocation
    */
-  explicit chunked_reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
-                          chunked_parquet_reader_options const& options,
+  explicit chunked_reader(std::size_t chunk_read_limit,
+                          std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
+                          parquet_reader_options const& options,
                           rmm::cuda_stream_view stream,
                           rmm::mr::device_memory_resource* mr);
 
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 7caa1d65d1f..66f4fc5b27f 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -407,119 +407,6 @@ table_with_metadata read_parquet(
   parquet_reader_options const& options,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-// chunked reader stuff
-class chunked_parquet_reader_options_builder;
-
-/**
- * @brief Settings for `chunked_parquet_reader`.
- */
-class chunked_parquet_reader_options : public parquet_reader_options {
-  // Limit the number of maximum bytes that chunked_parquet_reader will read each time.
-  std::size_t _byte_limit;
-
-  friend class chunked_parquet_reader_options_builder;
-
- public:
-  /**
-   * @brief Default constructor.
-   *
-   * This has been added since Cython requires a default constructor to create objects on stack.
-   */
-  explicit chunked_parquet_reader_options() = default;
-
-  /**
-   * @brief Creates a parquet_reader_options_builder which will build parquet_reader_options.
-   *
-   * @param src Source information to read parquet file
-   * @return Builder to build reader options
-   */
-  static chunked_parquet_reader_options_builder builder(source_info const& src);
-
-  /**
-   * @brief Return the maximum number of bytes that will be read by
-   * `chunked_parquet_reader::read()`.
-   *
-   * @return Number of maximum bytes to read each time
-   */
-  [[nodiscard]] size_type get_byte_limit() const { return _byte_limit; }
-
-  /**
-   * @brief Sets the maximum number of bytes that will be read by
-   * `chunked_parquet_reader::read()`.
-   *
-   * @param byte_limit Number of maximum bytes to read each time
-   */
-  void set_byte_limit(std::size_t byte_limit) { _byte_limit = byte_limit; }
-};
-
-/**
- * @brief Builds a `chunked_parquet_reader_options` instance to use with `chunked_parquet_reader`
- * class.
- */
-class chunked_parquet_reader_options_builder : public parquet_reader_options_builder {
-  chunked_parquet_reader_options chunked_reader_options{};
-  bool options_built{false};
-
-  /**
-   * @brief Create a `chunked_parquet_reader_options` object.
-   *
-   * The returned object is a result of taking over the ownership of the internal states.
-   * Therefore, this should be called at most once to avoid data corruption.
-   */
-  chunked_parquet_reader_options create_options()
-  {
-    CUDF_EXPECTS(!options_built, "This function should not be called more than once");
-    options_built = true;
-
-    dynamic_cast<parquet_reader_options&>(chunked_reader_options) = std::move(options);
-    return std::move(chunked_reader_options);
-  }
-
- public:
-  /**
-   * @brief Default constructor.
-   *
-   * This has been added since Cython requires a default constructor to create objects on stack.
-   */
-  chunked_parquet_reader_options_builder() = default;
-
-  /**
-   * @brief Constructor from source info.
-   *
-   * @param src The source information used to read parquet file
-   */
-  explicit chunked_parquet_reader_options_builder(source_info const& src)
-    : parquet_reader_options_builder(src)
-  {
-  }
-
-  /**
-   * @brief Sets number of byte limit to read each time.
-   *
-   * @param limit Number of maximum bytes to read per `read_next()` call
-   * @return this for chaining
-   */
-  chunked_parquet_reader_options_builder& byte_limit(std::size_t limit)
-  {
-    chunked_reader_options.set_byte_limit(limit);
-    return *this;
-  }
-
-  /**
-   * @brief Return `chunked_parquet_reader_options` instance once this's built.
-   */
-  operator chunked_parquet_reader_options() { return create_options(); }
-
-  /**
-   * @brief Return `chunked_parquet_reader_options` instance once this's built.
-   *
-   * This has been added since Cython does not support overloading of conversion operators.
-   *
-   * @return Built `chunked_parquet_reader_options` object's r-value reference
-   */
-  chunked_parquet_reader_options build() { return create_options(); }
-};
-
 /**
  * @brief The chunked parquet reader class to handle options and read tables in chunks.
  *
@@ -531,18 +418,25 @@ class chunked_parquet_reader {
  public:
   /**
    * @brief Default constructor, this should never be used.
-   *        This is added just to satisfy cython.
+   *
+   * This is added just to satisfy cython.
    */
   chunked_parquet_reader() = default;
 
   /**
-   * @brief Constructor with chunked reader options.
+   * @brief Constructor for chunked reader.
    *
-   * @param options The options used to read file
+   * This constructor accepts the same `parquet_reader_option` parameter as in `read_parquet()`, but
+   * with an additional parameter to specify the size byte limit of the output table for each
+   * reading.
+   *
+   * @param chunk_read_limit The limit (in bytes) to read each time
+   * @param options The options used to read Parquet file
    * @param mr Device memory resource to use for device memory allocation
    */
   chunked_parquet_reader(
-    chunked_parquet_reader_options const& options,
+    std::size_t chunk_read_limit,
+    parquet_reader_options const& options,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -553,37 +447,24 @@ class chunked_parquet_reader {
   /**
    * @brief Check if there is any data of the given file has not yet processed.
    *
-   * If the file has been closed (i.e., the `close()` function has been called), this will always
-   * return `false`.
-   *
-   * @return A boolean value indicating if there is any data left to process
+   * @return A boolean value indicating if there is any data left to read
    */
   bool has_next();
 
   /**
    * @brief Read a chunk of Parquet dataset into a set of columns.
    *
-   * The sequence of returned tables, if concatenated by their order, guarantee to form a complete
+   * The sequence of returned tables, if concatenated by their order, guarantees to form a complete
    * dataset as reading the entire given file at once.
    *
-   * An empty table will be returned if all the data in the given file has been read and returned by
-   * the previous calls, or the `close()` function has been called.
+   * An empty table will be returned if the file is empty, or all the data in the given file has
+   * been read and returned by the previous calls.
    *
-   * @return The set of columns along with metadata
+   * @return The output `cudf::table` along with its metadata
    */
   table_with_metadata read_chunk();
 
  private:
-  /**
-   * @brief Perform all necessary preprocessing work for reading the given file.
-   *
-   * The preprocessing is performed for the entire file, not just by chunks, which may include:
-   *  - Parsing the schema.
-   *  - Decompressing and processing pages.
-   *  - Any other necessary preprocessing steps.
-   */
-  //  void preprocess();
-
   std::unique_ptr<cudf::io::detail::parquet::chunked_reader> reader;
 };
 
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index c548687e937..9454dd90681 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -86,13 +86,6 @@ parquet_reader_options_builder parquet_reader_options::builder(source_info const
   return parquet_reader_options_builder{src};
 }
 
-// Returns builder for parquet_reader_options
-chunked_parquet_reader_options_builder chunked_parquet_reader_options::builder(
-  source_info const& src)
-{
-  return chunked_parquet_reader_options_builder{src};
-}
-
 // Returns builder for parquet_writer_options
 parquet_writer_options_builder parquet_writer_options::builder(sink_info const& sink,
                                                                table_view const& table)
@@ -475,10 +468,14 @@ std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const
 /**
  * @copydoc cudf::io::chunked_parquet_reader::chunked_parquet_reader
  */
-chunked_parquet_reader::chunked_parquet_reader(chunked_parquet_reader_options const& options,
+chunked_parquet_reader::chunked_parquet_reader(std::size_t chunk_read_limit,
+                                               parquet_reader_options const& options,
                                                rmm::mr::device_memory_resource* mr)
-  : reader{std::make_unique<detail_parquet::chunked_reader>(
-      make_datasources(options.get_source()), options, cudf::detail::default_stream_value, mr)}
+  : reader{std::make_unique<detail_parquet::chunked_reader>(chunk_read_limit,
+                                                            make_datasources(options.get_source()),
+                                                            options,
+                                                            cudf::get_default_stream(),
+                                                            mr)}
 {
 }
 
@@ -495,17 +492,7 @@ bool chunked_parquet_reader::has_next() { return reader->has_next(); }
 /**
  * @copydoc cudf::io::chunked_parquet_reader::read_chunk
  */
-table_with_metadata chunked_parquet_reader::read_chunk()
-{
-  // On the first call, a preprocessing step is called which may be expensive before a table is
-  // returned. All subsequent calls are essentially just doing incremental column allocation and row
-  // decoding (using all the data stored from the preprocessing step).
-
-  // In each call to this function, the internal `skip_rows` state is updated such that the next
-  // call will skip the rows returned by the previous call, making sure that the sequence of
-  // returned tables are continuous and form a complete dataset as reading the entire file at once.
-  return reader->read_chunk();
-}
+table_with_metadata chunked_parquet_reader::read_chunk() { return reader->read_chunk(); }
 
 /**
  * @copydoc cudf::io::parquet_chunked_writer::parquet_chunked_writer
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 24888073701..c8b31505751 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1571,13 +1571,6 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                               options.is_enabled_use_pandas_metadata(),
                               _strings_to_categorical,
                               _timestamp_type.id());
-
-  // If the options passed in is an instance of `chunked_parquet_reader_options`, extract the
-  // `byte_limit` parameter.
-  if (auto const chunked_options = dynamic_cast<chunked_parquet_reader_options const*>(&options);
-      chunked_options) {
-    _chunk_read_limit = chunked_options->get_byte_limit();
-  }
 }
 
 void reader::impl::preprocess_file_and_columns(
@@ -1895,15 +1888,17 @@ table_with_metadata reader::read(parquet_reader_options const& options)
 }
 
 // Forward to implementation
-chunked_reader::chunked_reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
-                               chunked_parquet_reader_options const& options,
+chunked_reader::chunked_reader(std::size_t chunk_read_limit,
+                               std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
+                               parquet_reader_options const& options,
                                rmm::cuda_stream_view stream,
                                rmm::mr::device_memory_resource* mr)
   : reader(std::forward<std::vector<std::unique_ptr<cudf::io::datasource>>>(sources),
-           dynamic_cast<parquet_reader_options const&>(options),
+           options,
            stream,
            mr)
 {
+  _impl->set_chunk_read_limit(chunk_read_limit);
 }
 
 // Destructor within this translation unit
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 8c1bb9568a4..2d54b8984e8 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -82,6 +82,13 @@ class reader::impl {
                            bool uses_custom_row_bounds,
                            std::vector<std::vector<size_type>> const& row_group_indices);
 
+  /**
+   * @brief set_chunk_read_limit
+   * // TODO
+   * @param chunk_read_limit
+   */
+  void set_chunk_read_limit(std::size_t chunk_read_limit) { _chunk_read_limit = chunk_read_limit; }
+
   /**
    * TODO
    *
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 3393a6139b5..9f61e43f076 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -57,10 +57,8 @@ using strings_col = cudf::test::strings_column_wrapper;
 auto chunked_read(std::string const& filepath, std::size_t byte_limit)
 {
   auto const read_opts =
-    cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .byte_limit(byte_limit)
-      .build();
-  auto reader = cudf::io::chunked_parquet_reader(read_opts);
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).build();
+  auto reader = cudf::io::chunked_parquet_reader(byte_limit, read_opts);
 
   auto num_chunks = 0;
   auto result     = std::make_unique<cudf::table>();

From db56908ffbf109e4fb8678f9aad18b1b35ce7121 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 24 Oct 2022 09:48:21 -0700
Subject: [PATCH 084/162] Cleanup: Remove `chunked_parquet_reader_options` and
 `chunked_parquet_reader_options_builder`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/detail/parquet.hpp       |   6 +-
 cpp/include/cudf/io/parquet.hpp              | 151 ++-----------------
 cpp/src/io/functions.cpp                     |  29 +---
 cpp/src/io/parquet/reader_impl.cu            |  15 +-
 cpp/src/io/parquet/reader_impl.hpp           |   7 +
 cpp/tests/io/parquet_chunked_reader_test.cpp |   6 +-
 6 files changed, 41 insertions(+), 173 deletions(-)

diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp
index 3bba9ddc841..5c5358d35a3 100644
--- a/cpp/include/cudf/io/detail/parquet.hpp
+++ b/cpp/include/cudf/io/detail/parquet.hpp
@@ -36,7 +36,6 @@ namespace io {
 // Forward declaration
 class parquet_reader_options;
 class parquet_writer_options;
-class chunked_parquet_reader_options;
 class chunked_parquet_writer_options;
 
 namespace detail {
@@ -96,8 +95,9 @@ class chunked_reader : reader {
    * @param stream CUDA stream used for device memory operations and kernel launches.
    * @param mr Device memory resource to use for device memory allocation
    */
-  explicit chunked_reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
-                          chunked_parquet_reader_options const& options,
+  explicit chunked_reader(std::size_t chunk_read_limit,
+                          std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
+                          parquet_reader_options const& options,
                           rmm::cuda_stream_view stream,
                           rmm::mr::device_memory_resource* mr);
 
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 7caa1d65d1f..66f4fc5b27f 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -407,119 +407,6 @@ table_with_metadata read_parquet(
   parquet_reader_options const& options,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-// chunked reader stuff
-class chunked_parquet_reader_options_builder;
-
-/**
- * @brief Settings for `chunked_parquet_reader`.
- */
-class chunked_parquet_reader_options : public parquet_reader_options {
-  // Limit the number of maximum bytes that chunked_parquet_reader will read each time.
-  std::size_t _byte_limit;
-
-  friend class chunked_parquet_reader_options_builder;
-
- public:
-  /**
-   * @brief Default constructor.
-   *
-   * This has been added since Cython requires a default constructor to create objects on stack.
-   */
-  explicit chunked_parquet_reader_options() = default;
-
-  /**
-   * @brief Creates a parquet_reader_options_builder which will build parquet_reader_options.
-   *
-   * @param src Source information to read parquet file
-   * @return Builder to build reader options
-   */
-  static chunked_parquet_reader_options_builder builder(source_info const& src);
-
-  /**
-   * @brief Return the maximum number of bytes that will be read by
-   * `chunked_parquet_reader::read()`.
-   *
-   * @return Number of maximum bytes to read each time
-   */
-  [[nodiscard]] size_type get_byte_limit() const { return _byte_limit; }
-
-  /**
-   * @brief Sets the maximum number of bytes that will be read by
-   * `chunked_parquet_reader::read()`.
-   *
-   * @param byte_limit Number of maximum bytes to read each time
-   */
-  void set_byte_limit(std::size_t byte_limit) { _byte_limit = byte_limit; }
-};
-
-/**
- * @brief Builds a `chunked_parquet_reader_options` instance to use with `chunked_parquet_reader`
- * class.
- */
-class chunked_parquet_reader_options_builder : public parquet_reader_options_builder {
-  chunked_parquet_reader_options chunked_reader_options{};
-  bool options_built{false};
-
-  /**
-   * @brief Create a `chunked_parquet_reader_options` object.
-   *
-   * The returned object is a result of taking over the ownership of the internal states.
-   * Therefore, this should be called at most once to avoid data corruption.
-   */
-  chunked_parquet_reader_options create_options()
-  {
-    CUDF_EXPECTS(!options_built, "This function should not be called more than once");
-    options_built = true;
-
-    dynamic_cast<parquet_reader_options&>(chunked_reader_options) = std::move(options);
-    return std::move(chunked_reader_options);
-  }
-
- public:
-  /**
-   * @brief Default constructor.
-   *
-   * This has been added since Cython requires a default constructor to create objects on stack.
-   */
-  chunked_parquet_reader_options_builder() = default;
-
-  /**
-   * @brief Constructor from source info.
-   *
-   * @param src The source information used to read parquet file
-   */
-  explicit chunked_parquet_reader_options_builder(source_info const& src)
-    : parquet_reader_options_builder(src)
-  {
-  }
-
-  /**
-   * @brief Sets number of byte limit to read each time.
-   *
-   * @param limit Number of maximum bytes to read per `read_next()` call
-   * @return this for chaining
-   */
-  chunked_parquet_reader_options_builder& byte_limit(std::size_t limit)
-  {
-    chunked_reader_options.set_byte_limit(limit);
-    return *this;
-  }
-
-  /**
-   * @brief Return `chunked_parquet_reader_options` instance once this's built.
-   */
-  operator chunked_parquet_reader_options() { return create_options(); }
-
-  /**
-   * @brief Return `chunked_parquet_reader_options` instance once this's built.
-   *
-   * This has been added since Cython does not support overloading of conversion operators.
-   *
-   * @return Built `chunked_parquet_reader_options` object's r-value reference
-   */
-  chunked_parquet_reader_options build() { return create_options(); }
-};
-
 /**
  * @brief The chunked parquet reader class to handle options and read tables in chunks.
  *
@@ -531,18 +418,25 @@ class chunked_parquet_reader {
  public:
   /**
    * @brief Default constructor, this should never be used.
-   *        This is added just to satisfy cython.
+   *
+   * This is added just to satisfy cython.
    */
   chunked_parquet_reader() = default;
 
   /**
-   * @brief Constructor with chunked reader options.
+   * @brief Constructor for chunked reader.
    *
-   * @param options The options used to read file
+   * This constructor accepts the same `parquet_reader_option` parameter as in `read_parquet()`, but
+   * with an additional parameter to specify the size byte limit of the output table for each
+   * reading.
+   *
+   * @param chunk_read_limit The limit (in bytes) to read each time
+   * @param options The options used to read Parquet file
    * @param mr Device memory resource to use for device memory allocation
    */
   chunked_parquet_reader(
-    chunked_parquet_reader_options const& options,
+    std::size_t chunk_read_limit,
+    parquet_reader_options const& options,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -553,37 +447,24 @@ class chunked_parquet_reader {
   /**
    * @brief Check if there is any data of the given file has not yet processed.
    *
-   * If the file has been closed (i.e., the `close()` function has been called), this will always
-   * return `false`.
-   *
-   * @return A boolean value indicating if there is any data left to process
+   * @return A boolean value indicating if there is any data left to read
    */
   bool has_next();
 
   /**
    * @brief Read a chunk of Parquet dataset into a set of columns.
    *
-   * The sequence of returned tables, if concatenated by their order, guarantee to form a complete
+   * The sequence of returned tables, if concatenated by their order, guarantees to form a complete
    * dataset as reading the entire given file at once.
    *
-   * An empty table will be returned if all the data in the given file has been read and returned by
-   * the previous calls, or the `close()` function has been called.
+   * An empty table will be returned if the file is empty, or all the data in the given file has
+   * been read and returned by the previous calls.
    *
-   * @return The set of columns along with metadata
+   * @return The output `cudf::table` along with its metadata
    */
   table_with_metadata read_chunk();
 
  private:
-  /**
-   * @brief Perform all necessary preprocessing work for reading the given file.
-   *
-   * The preprocessing is performed for the entire file, not just by chunks, which may include:
-   *  - Parsing the schema.
-   *  - Decompressing and processing pages.
-   *  - Any other necessary preprocessing steps.
-   */
-  //  void preprocess();
-
   std::unique_ptr<cudf::io::detail::parquet::chunked_reader> reader;
 };
 
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index c548687e937..9454dd90681 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -86,13 +86,6 @@ parquet_reader_options_builder parquet_reader_options::builder(source_info const
   return parquet_reader_options_builder{src};
 }
 
-// Returns builder for parquet_reader_options
-chunked_parquet_reader_options_builder chunked_parquet_reader_options::builder(
-  source_info const& src)
-{
-  return chunked_parquet_reader_options_builder{src};
-}
-
 // Returns builder for parquet_writer_options
 parquet_writer_options_builder parquet_writer_options::builder(sink_info const& sink,
                                                                table_view const& table)
@@ -475,10 +468,14 @@ std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const
 /**
  * @copydoc cudf::io::chunked_parquet_reader::chunked_parquet_reader
  */
-chunked_parquet_reader::chunked_parquet_reader(chunked_parquet_reader_options const& options,
+chunked_parquet_reader::chunked_parquet_reader(std::size_t chunk_read_limit,
+                                               parquet_reader_options const& options,
                                                rmm::mr::device_memory_resource* mr)
-  : reader{std::make_unique<detail_parquet::chunked_reader>(
-      make_datasources(options.get_source()), options, cudf::detail::default_stream_value, mr)}
+  : reader{std::make_unique<detail_parquet::chunked_reader>(chunk_read_limit,
+                                                            make_datasources(options.get_source()),
+                                                            options,
+                                                            cudf::get_default_stream(),
+                                                            mr)}
 {
 }
 
@@ -495,17 +492,7 @@ bool chunked_parquet_reader::has_next() { return reader->has_next(); }
 /**
  * @copydoc cudf::io::chunked_parquet_reader::read_chunk
  */
-table_with_metadata chunked_parquet_reader::read_chunk()
-{
-  // On the first call, a preprocessing step is called which may be expensive before a table is
-  // returned. All subsequent calls are essentially just doing incremental column allocation and row
-  // decoding (using all the data stored from the preprocessing step).
-
-  // In each call to this function, the internal `skip_rows` state is updated such that the next
-  // call will skip the rows returned by the previous call, making sure that the sequence of
-  // returned tables are continuous and form a complete dataset as reading the entire file at once.
-  return reader->read_chunk();
-}
+table_with_metadata chunked_parquet_reader::read_chunk() { return reader->read_chunk(); }
 
 /**
  * @copydoc cudf::io::parquet_chunked_writer::parquet_chunked_writer
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 24888073701..c8b31505751 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1571,13 +1571,6 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                               options.is_enabled_use_pandas_metadata(),
                               _strings_to_categorical,
                               _timestamp_type.id());
-
-  // If the options passed in is an instance of `chunked_parquet_reader_options`, extract the
-  // `byte_limit` parameter.
-  if (auto const chunked_options = dynamic_cast<chunked_parquet_reader_options const*>(&options);
-      chunked_options) {
-    _chunk_read_limit = chunked_options->get_byte_limit();
-  }
 }
 
 void reader::impl::preprocess_file_and_columns(
@@ -1895,15 +1888,17 @@ table_with_metadata reader::read(parquet_reader_options const& options)
 }
 
 // Forward to implementation
-chunked_reader::chunked_reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
-                               chunked_parquet_reader_options const& options,
+chunked_reader::chunked_reader(std::size_t chunk_read_limit,
+                               std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
+                               parquet_reader_options const& options,
                                rmm::cuda_stream_view stream,
                                rmm::mr::device_memory_resource* mr)
   : reader(std::forward<std::vector<std::unique_ptr<cudf::io::datasource>>>(sources),
-           dynamic_cast<parquet_reader_options const&>(options),
+           options,
            stream,
            mr)
 {
+  _impl->set_chunk_read_limit(chunk_read_limit);
 }
 
 // Destructor within this translation unit
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 8c1bb9568a4..2d54b8984e8 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -82,6 +82,13 @@ class reader::impl {
                            bool uses_custom_row_bounds,
                            std::vector<std::vector<size_type>> const& row_group_indices);
 
+  /**
+   * @brief set_chunk_read_limit
+   * // TODO
+   * @param chunk_read_limit
+   */
+  void set_chunk_read_limit(std::size_t chunk_read_limit) { _chunk_read_limit = chunk_read_limit; }
+
   /**
    * TODO
    *
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 3393a6139b5..9f61e43f076 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -57,10 +57,8 @@ using strings_col = cudf::test::strings_column_wrapper;
 auto chunked_read(std::string const& filepath, std::size_t byte_limit)
 {
   auto const read_opts =
-    cudf::io::chunked_parquet_reader_options::builder(cudf::io::source_info{filepath})
-      .byte_limit(byte_limit)
-      .build();
-  auto reader = cudf::io::chunked_parquet_reader(read_opts);
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).build();
+  auto reader = cudf::io::chunked_parquet_reader(byte_limit, read_opts);
 
   auto num_chunks = 0;
   auto result     = std::make_unique<cudf::table>();

From daa3d7eca944dad95d92d8bc9889e06d119670b9 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 24 Oct 2022 10:43:03 -0700
Subject: [PATCH 085/162] Fix compile errors

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 java/src/main/native/src/ChunkedReaderJni.cpp | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/java/src/main/native/src/ChunkedReaderJni.cpp b/java/src/main/native/src/ChunkedReaderJni.cpp
index e5a1813ebfe..aaddf6c743a 100644
--- a/java/src/main/native/src/ChunkedReaderJni.cpp
+++ b/java/src/main/native/src/ChunkedReaderJni.cpp
@@ -100,17 +100,16 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_create(
                                                   static_cast<std::size_t>(buffer_length)) :
                             cudf::io::source_info(filename.get());
 
-    // TODO: use builder
-    auto read_opts = cudf::io::chunked_parquet_reader_options::builder(source)
-                         .byte_limit(chunk_size_byte_limit)
-                         .build();
+    auto opts_builder = cudf::io::parquet_reader_options::builder(source);
     if (n_filter_col_names.size() > 0) {
-      read_opts.set_columns(n_filter_col_names.as_cpp_vector());
+      opts_builder = opts_builder.columns(n_filter_col_names.as_cpp_vector());
     }
-    read_opts.enable_convert_strings_to_categories(false);
-    read_opts.set_timestamp_type(cudf::data_type(static_cast<cudf::type_id>(unit)));
+    auto const read_opts = opts_builder.convert_strings_to_categories(false)
+                               .timestamp_type(cudf::data_type(static_cast<cudf::type_id>(unit)))
+                               .build();
 
-    return reinterpret_cast<jlong>(new cudf::io::chunked_parquet_reader(read_opts));
+    return reinterpret_cast<jlong>(new cudf::io::chunked_parquet_reader(
+        static_cast<std::size_t>(chunk_size_byte_limit), read_opts));
   }
   CATCH_STD(env, 0);
 }

From bb2fbc0506ebed6dd16810046d1f3f5284f46f56 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 24 Oct 2022 10:43:15 -0700
Subject: [PATCH 086/162] Load native deps and make the constructors public

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 .../java/ai/rapids/cudf/ParquetChunkedReader.java  | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
index f65c4123060..5eeb850d300 100644
--- a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
+++ b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
@@ -24,20 +24,23 @@
  * TODO
  */
 public class ParquetChunkedReader implements AutoCloseable {
-  private long handle;
+  static {
+    NativeDepsLoader.loadNativeDeps();
+  }
+
 
 
   /**
    * TODO
    */
-  ParquetChunkedReader(long chunkSizeByteLimit, File path) {
+  public ParquetChunkedReader(long chunkSizeByteLimit, File path) {
     this(chunkSizeByteLimit, ParquetOptions.DEFAULT, path);
   }
 
   /**
    * TODO
    */
-  ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, File path) {
+  public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, File path) {
     handle = create(chunkSizeByteLimit, opts.getIncludeColumnNames(), opts.getReadBinaryAsString(),
         path.getAbsolutePath(), 0, 0, opts.timeUnit().typeId.getNativeId());
   }
@@ -50,7 +53,7 @@ public class ParquetChunkedReader implements AutoCloseable {
    * @param offset
    * @param len
    */
-  ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, HostMemoryBuffer buffer,
+  public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, HostMemoryBuffer buffer,
       long offset, long len) {
     handle = create(chunkSizeByteLimit, opts.getIncludeColumnNames(), opts.getReadBinaryAsString(), null,
         buffer.getAddress() + offset, len, opts.timeUnit().typeId.getNativeId());
@@ -83,6 +86,9 @@ public void close() {
     }
   }
 
+
+  private long handle;
+
   /**
    * TODO
    * @param chunkSizeByteLimit     TODO

From ef5eaee41b4c044de17179ffae7a0f7f1ea7d193 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 24 Oct 2022 11:03:36 -0700
Subject: [PATCH 087/162] Move `preprocess_file` into `reader_preprocess.cu`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu       | 152 ----------------------
 cpp/src/io/parquet/reader_preprocess.cu | 165 ++++++++++++++++++++++--
 2 files changed, 157 insertions(+), 160 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index c8b31505751..aced3b8cb96 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1613,158 +1613,6 @@ void reader::impl::preprocess_file_and_columns(
   _file_preprocessed = true;
 }
 
-std::pair<size_type, size_type> reader::impl::preprocess_file(
-  size_type skip_rows,
-  size_type num_rows,
-  const std::vector<std::vector<size_type>>& row_group_list)
-{
-  //  printf("\n\n\n\npreprocess========================\n");
-
-  // Select only row groups required
-  // Note: `skip_rows` and `num_rows` will be modified in this function.
-  const auto selected_row_groups =
-    _metadata->select_row_groups(row_group_list, skip_rows, num_rows);
-
-  // TODO: fix this
-  if (selected_row_groups.size() == 0 || _input_columns.size() == 0) {
-    return {skip_rows, num_rows};
-  }
-
-  // TODO: fix this.
-  // Need to check if the file actually has data.
-  _file_itm_data.has_data = true;
-
-  // Descriptors for all the chunks that make up the selected columns
-  const auto num_input_columns = _input_columns.size();
-  const auto num_chunks        = selected_row_groups.size() * num_input_columns;
-  _file_itm_data.chunks        = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
-
-  // Association between each column chunk and its source
-  std::vector<size_type> chunk_source_map(num_chunks);
-
-  // Tracker for eventually deallocating compressed and uncompressed data
-  _file_itm_data.raw_page_data = std::vector<std::unique_ptr<datasource::buffer>>(num_chunks);
-
-  // Keep track of column chunk file offsets
-  std::vector<size_t> column_chunk_offsets(num_chunks);
-
-  // Initialize column chunk information
-  size_t total_decompressed_size = 0;
-  auto remaining_rows            = num_rows;
-  std::vector<std::future<void>> read_rowgroup_tasks;
-  for (const auto& rg : selected_row_groups) {
-    const auto& row_group       = _metadata->get_row_group(rg.index, rg.source_index);
-    auto const row_group_start  = rg.start_row;
-    auto const row_group_source = rg.source_index;
-    auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
-    auto const io_chunk_idx     = _file_itm_data.chunks.size();
-
-    // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
-    for (size_t i = 0; i < num_input_columns; ++i) {
-      auto col = _input_columns[i];
-      // look up metadata
-      auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx);
-      auto& schema   = _metadata->get_schema(col.schema_idx);
-
-      auto [type_width, clock_rate, converted_type] =
-        conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()),
-                        _timestamp_type.id(),
-                        schema.type,
-                        schema.converted_type,
-                        schema.type_length);
-
-      column_chunk_offsets[_file_itm_data.chunks.size()] =
-        (col_meta.dictionary_page_offset != 0)
-          ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
-          : col_meta.data_page_offset;
-
-      _file_itm_data.chunks.push_back(
-        gpu::ColumnChunkDesc(col_meta.total_compressed_size,
-                             nullptr,
-                             col_meta.num_values,
-                             schema.type,
-                             type_width,
-                             row_group_start,
-                             row_group_rows,
-                             schema.max_definition_level,
-                             schema.max_repetition_level,
-                             _metadata->get_output_nesting_depth(col.schema_idx),
-                             required_bits(schema.max_definition_level),
-                             required_bits(schema.max_repetition_level),
-                             col_meta.codec,
-                             converted_type,
-                             schema.logical_type,
-                             schema.decimal_scale,
-                             clock_rate,
-                             i,
-                             col.schema_idx));
-
-      // Map each column chunk to its column index and its source index
-      chunk_source_map[_file_itm_data.chunks.size() - 1] = row_group_source;
-
-      if (col_meta.codec != Compression::UNCOMPRESSED) {
-        total_decompressed_size += col_meta.total_uncompressed_size;
-      }
-    }
-    // Read compressed chunk data to device memory
-    read_rowgroup_tasks.push_back(read_column_chunks(_file_itm_data.raw_page_data,
-                                                     _file_itm_data.chunks,
-                                                     io_chunk_idx,
-                                                     _file_itm_data.chunks.size(),
-                                                     column_chunk_offsets,
-                                                     chunk_source_map));
-
-    remaining_rows -= row_group.num_rows;
-  }
-  for (auto& task : read_rowgroup_tasks) {
-    task.wait();
-  }
-  assert(remaining_rows <= 0);
-
-  // Process dataset chunk pages into output columns
-  const auto total_pages    = count_page_headers(_file_itm_data.chunks);
-  _file_itm_data.pages_info = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
-
-  if (total_pages > 0) {
-    // decoding of column/page information
-    decode_page_headers(_file_itm_data.chunks, _file_itm_data.pages_info);
-    if (total_decompressed_size > 0) {
-      _file_itm_data.decomp_page_data =
-        decompress_page_data(_file_itm_data.chunks, _file_itm_data.pages_info);
-      // Free compressed data
-      for (size_t c = 0; c < _file_itm_data.chunks.size(); c++) {
-        if (_file_itm_data.chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
-          _file_itm_data.raw_page_data[c].reset();
-          // TODO: Check if this is called
-        }
-      }
-    }
-
-    // build output column info
-    // walk the schema, building out_buffers that mirror what our final cudf columns will look
-    // like. important : there is not necessarily a 1:1 mapping between input columns and output
-    // columns. For example, parquet does not explicitly store a ColumnChunkDesc for struct
-    // columns. The "structiness" is simply implied by the schema.  For example, this schema:
-    //  required group field_id=1 name {
-    //    required binary field_id=2 firstname (String);
-    //    required binary field_id=3 middlename (String);
-    //    required binary field_id=4 lastname (String);
-    // }
-    // will only contain 3 columns of data (firstname, middlename, lastname).  But of course
-    // "name" is a struct column that we want to return, so we have to make sure that we
-    // create it ourselves.
-    // std::vector<output_column_info> output_info = build_output_column_info();
-
-    // nesting information (sizes, etc) stored -per page-
-    // note : even for flat schemas, we allocate 1 level of "nesting" info
-
-    allocate_nesting_info(
-      _file_itm_data.chunks, _file_itm_data.pages_info, _file_itm_data.page_nesting_info);
-  }
-
-  return {skip_rows, num_rows};
-}
-
 table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bounds)
 {
   table_metadata out_metadata;
diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index 9a6a2388da8..29c3f7635af 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -415,9 +415,6 @@ struct start_offset_output_iterator {
 
 }  // anonymous namespace
 
-/**
- * @copydoc cudf::io::detail::parquet::preprocess_columns
- */
 void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                                       hostdevice_vector<gpu::PageInfo>& pages,
                                       size_t min_row,
@@ -492,9 +489,9 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
   // intermediate data we will need for further chunked reads
   if (has_lists || chunked_read_size > 0) {
     // computes:
-    // PageNestingInfo::num_rows for each page. the true number of rows (taking repetition into account), not
-    // just the number of values.
-    // PageNestingInfo::size for each level of nesting, for each page. 
+    // PageNestingInfo::num_rows for each page. the true number of rows (taking repetition into
+    // account), not just the number of values. PageNestingInfo::size for each level of nesting, for
+    // each page.
     //
     // we will be applying a later "trim" pass if skip_rows/num_rows is being used, which can happen
     // if:
@@ -502,7 +499,7 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
     // - if we will be doing a chunked read
     gpu::ComputePageSizes(pages,
                           chunks,
-                          0,                      // 0-max size_t. process all possible rows
+                          0,  // 0-max size_t. process all possible rows
                           std::numeric_limits<size_t>::max(),
                           true,                   // compute num_rows
                           chunked_read_size > 0,  // compute string sizes
@@ -579,7 +576,7 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
 {
   // computes:
   // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
-  // PageInfo::skipped_values, which tells us where to start decoding in the input to respect the 
+  // PageInfo::skipped_values, which tells us where to start decoding in the input to respect the
   // user bounds.
   // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
   // specified artifical bounds).
@@ -676,4 +673,156 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   }
 }
 
+std::pair<size_type, size_type> reader::impl::preprocess_file(
+  size_type skip_rows,
+  size_type num_rows,
+  const std::vector<std::vector<size_type>>& row_group_list)
+{
+  //  printf("\n\n\n\npreprocess========================\n");
+
+  // Select only row groups required
+  // Note: `skip_rows` and `num_rows` will be modified in this function.
+  const auto selected_row_groups =
+    _metadata->select_row_groups(row_group_list, skip_rows, num_rows);
+
+  // TODO: fix this
+  if (selected_row_groups.size() == 0 || _input_columns.size() == 0) {
+    return {skip_rows, num_rows};
+  }
+
+  // TODO: fix this.
+  // Need to check if the file actually has data.
+  _file_itm_data.has_data = true;
+
+  // Descriptors for all the chunks that make up the selected columns
+  const auto num_input_columns = _input_columns.size();
+  const auto num_chunks        = selected_row_groups.size() * num_input_columns;
+  _file_itm_data.chunks        = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
+
+  // Association between each column chunk and its source
+  std::vector<size_type> chunk_source_map(num_chunks);
+
+  // Tracker for eventually deallocating compressed and uncompressed data
+  _file_itm_data.raw_page_data = std::vector<std::unique_ptr<datasource::buffer>>(num_chunks);
+
+  // Keep track of column chunk file offsets
+  std::vector<size_t> column_chunk_offsets(num_chunks);
+
+  // Initialize column chunk information
+  size_t total_decompressed_size = 0;
+  auto remaining_rows            = num_rows;
+  std::vector<std::future<void>> read_rowgroup_tasks;
+  for (const auto& rg : selected_row_groups) {
+    const auto& row_group       = _metadata->get_row_group(rg.index, rg.source_index);
+    auto const row_group_start  = rg.start_row;
+    auto const row_group_source = rg.source_index;
+    auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
+    auto const io_chunk_idx     = _file_itm_data.chunks.size();
+
+    // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
+    for (size_t i = 0; i < num_input_columns; ++i) {
+      auto col = _input_columns[i];
+      // look up metadata
+      auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx);
+      auto& schema   = _metadata->get_schema(col.schema_idx);
+
+      auto [type_width, clock_rate, converted_type] =
+        conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()),
+                        _timestamp_type.id(),
+                        schema.type,
+                        schema.converted_type,
+                        schema.type_length);
+
+      column_chunk_offsets[_file_itm_data.chunks.size()] =
+        (col_meta.dictionary_page_offset != 0)
+          ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
+          : col_meta.data_page_offset;
+
+      _file_itm_data.chunks.push_back(
+        gpu::ColumnChunkDesc(col_meta.total_compressed_size,
+                             nullptr,
+                             col_meta.num_values,
+                             schema.type,
+                             type_width,
+                             row_group_start,
+                             row_group_rows,
+                             schema.max_definition_level,
+                             schema.max_repetition_level,
+                             _metadata->get_output_nesting_depth(col.schema_idx),
+                             required_bits(schema.max_definition_level),
+                             required_bits(schema.max_repetition_level),
+                             col_meta.codec,
+                             converted_type,
+                             schema.logical_type,
+                             schema.decimal_scale,
+                             clock_rate,
+                             i,
+                             col.schema_idx));
+
+      // Map each column chunk to its column index and its source index
+      chunk_source_map[_file_itm_data.chunks.size() - 1] = row_group_source;
+
+      if (col_meta.codec != Compression::UNCOMPRESSED) {
+        total_decompressed_size += col_meta.total_uncompressed_size;
+      }
+    }
+    // Read compressed chunk data to device memory
+    read_rowgroup_tasks.push_back(read_column_chunks(_file_itm_data.raw_page_data,
+                                                     _file_itm_data.chunks,
+                                                     io_chunk_idx,
+                                                     _file_itm_data.chunks.size(),
+                                                     column_chunk_offsets,
+                                                     chunk_source_map));
+
+    remaining_rows -= row_group.num_rows;
+  }
+  for (auto& task : read_rowgroup_tasks) {
+    task.wait();
+  }
+  assert(remaining_rows <= 0);
+
+  // Process dataset chunk pages into output columns
+  const auto total_pages    = count_page_headers(_file_itm_data.chunks);
+  _file_itm_data.pages_info = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
+
+  if (total_pages > 0) {
+    // decoding of column/page information
+    decode_page_headers(_file_itm_data.chunks, _file_itm_data.pages_info);
+    if (total_decompressed_size > 0) {
+      _file_itm_data.decomp_page_data =
+        decompress_page_data(_file_itm_data.chunks, _file_itm_data.pages_info);
+      // Free compressed data
+      for (size_t c = 0; c < _file_itm_data.chunks.size(); c++) {
+        if (_file_itm_data.chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
+          _file_itm_data.raw_page_data[c].reset();
+          // TODO: Check if this is called
+        }
+      }
+    }
+
+    // build output column info
+    // walk the schema, building out_buffers that mirror what our final cudf columns will look
+    // like. important : there is not necessarily a 1:1 mapping between input columns and output
+    // columns. For example, parquet does not explicitly store a ColumnChunkDesc for struct
+    // columns. The "structiness" is simply implied by the schema.  For example, this schema:
+    //  required group field_id=1 name {
+    //    required binary field_id=2 firstname (String);
+    //    required binary field_id=3 middlename (String);
+    //    required binary field_id=4 lastname (String);
+    // }
+    // will only contain 3 columns of data (firstname, middlename, lastname).  But of course
+    // "name" is a struct column that we want to return, so we have to make sure that we
+    // create it ourselves.
+    // std::vector<output_column_info> output_info = build_output_column_info();
+
+    // nesting information (sizes, etc) stored -per page-
+    // note : even for flat schemas, we allocate 1 level of "nesting" info
+
+    allocate_nesting_info(
+      _file_itm_data.chunks, _file_itm_data.pages_info, _file_itm_data.page_nesting_info);
+  }
+
+  return {skip_rows, num_rows};
+}
+
 }  // namespace cudf::io::detail::parquet

From d19260db41040e346b6ebbe18fe1507ba9f2dac7 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 24 Oct 2022 12:12:53 -0700
Subject: [PATCH 088/162] Move common implementation into
 `reader_impl_helpers.*`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/CMakeLists.txt                            |   3 +-
 cpp/src/io/parquet/reader_impl.cu             | 800 +-----------------
 cpp/src/io/parquet/reader_impl_helpers.cu     | 708 ++++++++++++++++
 cpp/src/io/parquet/reader_impl_helpers.cuh    | 264 ++++++
 ...reprocess.cu => reader_impl_preprocess.cu} |   1 +
 5 files changed, 976 insertions(+), 800 deletions(-)
 create mode 100644 cpp/src/io/parquet/reader_impl_helpers.cu
 create mode 100644 cpp/src/io/parquet/reader_impl_helpers.cuh
 rename cpp/src/io/parquet/{reader_preprocess.cu => reader_impl_preprocess.cu} (99%)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5c98f7c91dc..1c6fde0a11a 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -350,7 +350,8 @@ add_library(
   src/io/parquet/page_enc.cu
   src/io/parquet/page_hdr.cu
   src/io/parquet/reader_impl.cu
-  src/io/parquet/reader_preprocess.cu
+  src/io/parquet/reader_impl_helpers.cu
+  src/io/parquet/reader_impl_preprocess.cu
   src/io/parquet/writer_impl.cu
   src/io/statistics/orc_column_statistics.cu
   src/io/statistics/parquet_column_statistics.cu
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index aced3b8cb96..5229c0dc58b 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -20,6 +20,7 @@
  */
 
 #include "reader_impl.hpp"
+#include "reader_impl_helpers.cuh"
 
 #include "compact_protocol_reader.hpp"
 
@@ -59,805 +60,6 @@ namespace parquet {
 using namespace cudf::io::parquet;
 using namespace cudf::io;
 
-namespace {
-
-parquet::ConvertedType logical_type_to_converted_type(parquet::LogicalType const& logical)
-{
-  if (logical.isset.STRING) {
-    return parquet::UTF8;
-  } else if (logical.isset.MAP) {
-    return parquet::MAP;
-  } else if (logical.isset.LIST) {
-    return parquet::LIST;
-  } else if (logical.isset.ENUM) {
-    return parquet::ENUM;
-  } else if (logical.isset.DECIMAL) {
-    return parquet::DECIMAL;  // TODO set decimal values
-  } else if (logical.isset.DATE) {
-    return parquet::DATE;
-  } else if (logical.isset.TIME) {
-    if (logical.TIME.unit.isset.MILLIS)
-      return parquet::TIME_MILLIS;
-    else if (logical.TIME.unit.isset.MICROS)
-      return parquet::TIME_MICROS;
-  } else if (logical.isset.TIMESTAMP) {
-    if (logical.TIMESTAMP.unit.isset.MILLIS)
-      return parquet::TIMESTAMP_MILLIS;
-    else if (logical.TIMESTAMP.unit.isset.MICROS)
-      return parquet::TIMESTAMP_MICROS;
-  } else if (logical.isset.INTEGER) {
-    switch (logical.INTEGER.bitWidth) {
-      case 8: return logical.INTEGER.isSigned ? INT_8 : UINT_8;
-      case 16: return logical.INTEGER.isSigned ? INT_16 : UINT_16;
-      case 32: return logical.INTEGER.isSigned ? INT_32 : UINT_32;
-      case 64: return logical.INTEGER.isSigned ? INT_64 : UINT_64;
-      default: break;
-    }
-  } else if (logical.isset.UNKNOWN) {
-    return parquet::NA;
-  } else if (logical.isset.JSON) {
-    return parquet::JSON;
-  } else if (logical.isset.BSON) {
-    return parquet::BSON;
-  }
-  return parquet::UNKNOWN;
-}
-
-/**
- * @brief Function that translates Parquet datatype to cuDF type enum
- */
-type_id to_type_id(SchemaElement const& schema,
-                   bool strings_to_categorical,
-                   type_id timestamp_type_id)
-{
-  parquet::Type const physical            = schema.type;
-  parquet::LogicalType const logical_type = schema.logical_type;
-  parquet::ConvertedType converted_type   = schema.converted_type;
-  int32_t decimal_scale                   = schema.decimal_scale;
-
-  // Logical type used for actual data interpretation; the legacy converted type
-  // is superceded by 'logical' type whenever available.
-  auto const inferred_converted_type = logical_type_to_converted_type(logical_type);
-  if (inferred_converted_type != parquet::UNKNOWN) converted_type = inferred_converted_type;
-  if (inferred_converted_type == parquet::DECIMAL && decimal_scale == 0)
-    decimal_scale = schema.logical_type.DECIMAL.scale;
-
-  switch (converted_type) {
-    case parquet::UINT_8: return type_id::UINT8;
-    case parquet::INT_8: return type_id::INT8;
-    case parquet::UINT_16: return type_id::UINT16;
-    case parquet::INT_16: return type_id::INT16;
-    case parquet::UINT_32: return type_id::UINT32;
-    case parquet::UINT_64: return type_id::UINT64;
-    case parquet::DATE: return type_id::TIMESTAMP_DAYS;
-    case parquet::TIME_MILLIS:
-      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
-                                                   : type_id::DURATION_MILLISECONDS;
-    case parquet::TIME_MICROS:
-      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
-                                                   : type_id::DURATION_MICROSECONDS;
-    case parquet::TIMESTAMP_MILLIS:
-      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
-                                                   : type_id::TIMESTAMP_MILLISECONDS;
-    case parquet::TIMESTAMP_MICROS:
-      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
-                                                   : type_id::TIMESTAMP_MICROSECONDS;
-    case parquet::DECIMAL:
-      if (physical == parquet::INT32) { return type_id::DECIMAL32; }
-      if (physical == parquet::INT64) { return type_id::DECIMAL64; }
-      if (physical == parquet::FIXED_LEN_BYTE_ARRAY) {
-        if (schema.type_length <= static_cast<int32_t>(sizeof(int32_t))) {
-          return type_id::DECIMAL32;
-        }
-        if (schema.type_length <= static_cast<int32_t>(sizeof(int64_t))) {
-          return type_id::DECIMAL64;
-        }
-        if (schema.type_length <= static_cast<int32_t>(sizeof(__int128_t))) {
-          return type_id::DECIMAL128;
-        }
-      }
-      CUDF_FAIL("Invalid representation of decimal type");
-      break;
-
-    // maps are just List<Struct<>>.
-    case parquet::MAP:
-    case parquet::LIST: return type_id::LIST;
-    case parquet::NA: return type_id::STRING;
-    // return type_id::EMPTY; //TODO(kn): enable after Null/Empty column support
-    default: break;
-  }
-
-  if (inferred_converted_type == parquet::UNKNOWN and physical == parquet::INT64 and
-      logical_type.TIMESTAMP.unit.isset.NANOS) {
-    return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
-                                                 : type_id::TIMESTAMP_NANOSECONDS;
-  }
-
-  // is it simply a struct?
-  if (schema.is_struct()) { return type_id::STRUCT; }
-
-  // Physical storage type supported by Parquet; controls the on-disk storage
-  // format in combination with the encoding type.
-  switch (physical) {
-    case parquet::BOOLEAN: return type_id::BOOL8;
-    case parquet::INT32: return type_id::INT32;
-    case parquet::INT64: return type_id::INT64;
-    case parquet::FLOAT: return type_id::FLOAT32;
-    case parquet::DOUBLE: return type_id::FLOAT64;
-    case parquet::BYTE_ARRAY:
-    case parquet::FIXED_LEN_BYTE_ARRAY:
-      // Can be mapped to INT32 (32-bit hash) or STRING
-      return strings_to_categorical ? type_id::INT32 : type_id::STRING;
-    case parquet::INT96:
-      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
-                                                   : type_id::TIMESTAMP_NANOSECONDS;
-    default: break;
-  }
-
-  return type_id::EMPTY;
-}
-
-/**
- * @brief Converts cuDF type enum to column logical type
- */
-data_type to_data_type(type_id t_id, SchemaElement const& schema)
-{
-  return t_id == type_id::DECIMAL32 || t_id == type_id::DECIMAL64 || t_id == type_id::DECIMAL128
-           ? data_type{t_id, numeric::scale_type{-schema.decimal_scale}}
-           : data_type{t_id};
-}
-
-/**
- * @brief Function that returns the required the number of bits to store a value
- */
-template <typename T = uint8_t>
-T required_bits(uint32_t max_level)
-{
-  return static_cast<T>(CompactProtocolReader::NumRequiredBits(max_level));
-}
-
-/**
- * @brief Converts cuDF units to Parquet units.
- *
- * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type.
- */
-std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
-                                                     type_id timestamp_type_id,
-                                                     parquet::Type physical,
-                                                     int8_t converted,
-                                                     int32_t length)
-{
-  int32_t type_width = (physical == parquet::FIXED_LEN_BYTE_ARRAY) ? length : 0;
-  int32_t clock_rate = 0;
-  if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) {
-    type_width = 1;  // I32 -> I8
-  } else if (column_type_id == type_id::INT16 or column_type_id == type_id::UINT16) {
-    type_width = 2;  // I32 -> I16
-  } else if (column_type_id == type_id::INT32) {
-    type_width = 4;  // str -> hash32
-  } else if (is_chrono(data_type{column_type_id})) {
-    clock_rate = to_clockrate(timestamp_type_id);
-  }
-
-  int8_t converted_type = converted;
-  if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 &&
-      not cudf::is_fixed_point(data_type{column_type_id})) {
-    converted_type = parquet::UNKNOWN;  // Not converting to float64 or decimal
-  }
-  return std::make_tuple(type_width, clock_rate, converted_type);
-}
-
-inline void decompress_check(device_span<compression_result const> results,
-                             rmm::cuda_stream_view stream)
-{
-  CUDF_EXPECTS(thrust::all_of(rmm::exec_policy(stream),
-                              results.begin(),
-                              results.end(),
-                              [] __device__(auto const& res) {
-                                return res.status == compression_status::SUCCESS;
-                              }),
-               "Error during decompression");
-}
-}  // namespace
-
-std::string name_from_path(const std::vector<std::string>& path_in_schema)
-{
-  // For the case of lists, we will see a schema that looks like:
-  // a.list.element.list.element
-  // where each (list.item) pair represents a level of nesting.  According to the parquet spec,
-  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
-  // the initial field must be named "list" and the inner element must be named "element".
-  // If we are dealing with a list, we want to return the topmost name of the group ("a").
-  //
-  // For other nested schemas, like structs we just want to return the bottom-most name. For
-  // example a struct with the schema
-  // b.employee.id,  the column representing "id" should simply be named "id".
-  //
-  // In short, this means : return the highest level of the schema that does not have list
-  // definitions underneath it.
-  //
-  std::string s = (path_in_schema.size() > 0) ? path_in_schema[0] : "";
-  for (size_t i = 1; i < path_in_schema.size(); i++) {
-    // The Parquet spec requires that the outer schema field is named "list". However it also
-    // provides a list of backwards compatibility cases that are applicable as well.  Currently
-    // we are only handling the formal spec.  This will get cleaned up and improved when we add
-    // support for structs. The correct thing to do will probably be to examine the type of
-    // the SchemaElement itself to concretely identify the start of a nested type of any kind rather
-    // than trying to derive it from the path string.
-    if (path_in_schema[i] == "list") {
-      // Again, strictly speaking, the Parquet spec says the inner field should be named
-      // "element", but there are some backwards compatibility issues that we have seen in the
-      // wild. For example, Pandas calls the field "item".  We will allow any name for now.
-      i++;
-      continue;
-    }
-    // otherwise, we've got a real nested column. update the name
-    s = path_in_schema[i];
-  }
-  return s;
-}
-
-/**
- * @brief Class for parsing dataset metadata
- */
-struct metadata : public FileMetaData {
-  explicit metadata(datasource* source)
-  {
-    constexpr auto header_len = sizeof(file_header_s);
-    constexpr auto ender_len  = sizeof(file_ender_s);
-
-    const auto len           = source->size();
-    const auto header_buffer = source->host_read(0, header_len);
-    const auto header        = reinterpret_cast<const file_header_s*>(header_buffer->data());
-    const auto ender_buffer  = source->host_read(len - ender_len, ender_len);
-    const auto ender         = reinterpret_cast<const file_ender_s*>(ender_buffer->data());
-    CUDF_EXPECTS(len > header_len + ender_len, "Incorrect data source");
-    CUDF_EXPECTS(header->magic == parquet_magic && ender->magic == parquet_magic,
-                 "Corrupted header or footer");
-    CUDF_EXPECTS(ender->footer_len != 0 && ender->footer_len <= (len - header_len - ender_len),
-                 "Incorrect footer length");
-
-    const auto buffer = source->host_read(len - ender->footer_len - ender_len, ender->footer_len);
-    CompactProtocolReader cp(buffer->data(), ender->footer_len);
-    CUDF_EXPECTS(cp.read(this), "Cannot parse metadata");
-    CUDF_EXPECTS(cp.InitSchema(this), "Cannot initialize schema");
-  }
-};
-
-class aggregate_reader_metadata {
-  std::vector<metadata> per_file_metadata;
-  std::vector<std::unordered_map<std::string, std::string>> keyval_maps;
-  size_type num_rows;
-  size_type num_row_groups;
-  /**
-   * @brief Create a metadata object from each element in the source vector
-   */
-  auto metadatas_from_sources(std::vector<std::unique_ptr<datasource>> const& sources)
-  {
-    std::vector<metadata> metadatas;
-    std::transform(
-      sources.cbegin(), sources.cend(), std::back_inserter(metadatas), [](auto const& source) {
-        return metadata(source.get());
-      });
-    return metadatas;
-  }
-
-  /**
-   * @brief Collect the keyvalue maps from each per-file metadata object into a vector of maps.
-   */
-  [[nodiscard]] auto collect_keyval_metadata()
-  {
-    std::vector<std::unordered_map<std::string, std::string>> kv_maps;
-    std::transform(per_file_metadata.cbegin(),
-                   per_file_metadata.cend(),
-                   std::back_inserter(kv_maps),
-                   [](auto const& pfm) {
-                     std::unordered_map<std::string, std::string> kv_map;
-                     std::transform(pfm.key_value_metadata.cbegin(),
-                                    pfm.key_value_metadata.cend(),
-                                    std::inserter(kv_map, kv_map.end()),
-                                    [](auto const& kv) {
-                                      return std::pair{kv.key, kv.value};
-                                    });
-                     return kv_map;
-                   });
-
-    return kv_maps;
-  }
-
-  /**
-   * @brief Sums up the number of rows of each source
-   */
-  [[nodiscard]] size_type calc_num_rows() const
-  {
-    return std::accumulate(
-      per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) {
-        return sum + pfm.num_rows;
-      });
-  }
-
-  /**
-   * @brief Sums up the number of row groups of each source
-   */
-  [[nodiscard]] size_type calc_num_row_groups() const
-  {
-    return std::accumulate(
-      per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) {
-        return sum + pfm.row_groups.size();
-      });
-  }
-
- public:
-  aggregate_reader_metadata(std::vector<std::unique_ptr<datasource>> const& sources)
-    : per_file_metadata(metadatas_from_sources(sources)),
-      keyval_maps(collect_keyval_metadata()),
-      num_rows(calc_num_rows()),
-      num_row_groups(calc_num_row_groups())
-  {
-    // Verify that the input files have matching numbers of columns
-    size_type num_cols = -1;
-    for (auto const& pfm : per_file_metadata) {
-      if (pfm.row_groups.size() != 0) {
-        if (num_cols == -1)
-          num_cols = pfm.row_groups[0].columns.size();
-        else
-          CUDF_EXPECTS(num_cols == static_cast<size_type>(pfm.row_groups[0].columns.size()),
-                       "All sources must have the same number of columns");
-      }
-    }
-    // Verify that the input files have matching schemas
-    for (auto const& pfm : per_file_metadata) {
-      CUDF_EXPECTS(per_file_metadata[0].schema == pfm.schema,
-                   "All sources must have the same schemas");
-    }
-  }
-
-  [[nodiscard]] auto const& get_row_group(size_type row_group_index, size_type src_idx) const
-  {
-    CUDF_EXPECTS(src_idx >= 0 && src_idx < static_cast<size_type>(per_file_metadata.size()),
-                 "invalid source index");
-    return per_file_metadata[src_idx].row_groups[row_group_index];
-  }
-
-  [[nodiscard]] auto const& get_column_metadata(size_type row_group_index,
-                                                size_type src_idx,
-                                                int schema_idx) const
-  {
-    auto col = std::find_if(
-      per_file_metadata[src_idx].row_groups[row_group_index].columns.begin(),
-      per_file_metadata[src_idx].row_groups[row_group_index].columns.end(),
-      [schema_idx](ColumnChunk const& col) { return col.schema_idx == schema_idx ? true : false; });
-    CUDF_EXPECTS(col != std::end(per_file_metadata[src_idx].row_groups[row_group_index].columns),
-                 "Found no metadata for schema index");
-    return col->meta_data;
-  }
-
-  [[nodiscard]] auto get_num_rows() const { return num_rows; }
-
-  [[nodiscard]] auto get_num_row_groups() const { return num_row_groups; }
-
-  [[nodiscard]] auto const& get_schema(int schema_idx) const
-  {
-    return per_file_metadata[0].schema[schema_idx];
-  }
-
-  [[nodiscard]] auto const& get_key_value_metadata() const { return keyval_maps; }
-
-  /**
-   * @brief Gets the concrete nesting depth of output cudf columns
-   *
-   * @param schema_index Schema index of the input column
-   *
-   * @return comma-separated index column names in quotes
-   */
-  [[nodiscard]] inline int get_output_nesting_depth(int schema_index) const
-  {
-    auto& pfm = per_file_metadata[0];
-    int depth = 0;
-
-    // walk upwards, skipping repeated fields
-    while (schema_index > 0) {
-      if (!pfm.schema[schema_index].is_stub()) { depth++; }
-      // schema of one-level encoding list doesn't contain nesting information, so we need to
-      // manually add an extra nesting level
-      if (pfm.schema[schema_index].is_one_level_list()) { depth++; }
-      schema_index = pfm.schema[schema_index].parent_idx;
-    }
-    return depth;
-  }
-
-  /**
-   * @brief Extracts the pandas "index_columns" section
-   *
-   * PANDAS adds its own metadata to the key_value section when writing out the
-   * dataframe to a file to aid in exact reconstruction. The JSON-formatted
-   * metadata contains the index column(s) and PANDA-specific datatypes.
-   *
-   * @return comma-separated index column names in quotes
-   */
-  [[nodiscard]] std::string get_pandas_index() const
-  {
-    // Assumes that all input files have the same metadata
-    // TODO: verify this assumption
-    auto it = keyval_maps[0].find("pandas");
-    if (it != keyval_maps[0].end()) {
-      // Captures a list of quoted strings found inside square brackets after `"index_columns":`
-      // Inside quotes supports newlines, brackets, escaped quotes, etc.
-      // One-liner regex:
-      // "index_columns"\s*:\s*\[\s*((?:"(?:|(?:.*?(?![^\\]")).?)[^\\]?",?\s*)*)\]
-      // Documented below.
-      std::regex index_columns_expr{
-        R"("index_columns"\s*:\s*\[\s*)"  // match preamble, opening square bracket, whitespace
-        R"(()"                            // Open first capturing group
-        R"((?:")"                         // Open non-capturing group match opening quote
-        R"((?:|(?:.*?(?![^\\]")).?))"     // match empty string or anything between quotes
-        R"([^\\]?")"                      // Match closing non-escaped quote
-        R"(,?\s*)"                        // Match optional comma and whitespace
-        R"()*)"                           // Close non-capturing group and repeat 0 or more times
-        R"())"                            // Close first capturing group
-        R"(\])"                           // Match closing square brackets
-      };
-      std::smatch sm;
-      if (std::regex_search(it->second, sm, index_columns_expr)) { return sm[1].str(); }
-    }
-    return "";
-  }
-
-  /**
-   * @brief Extracts the column name(s) used for the row indexes in a dataframe
-   *
-   * @param names List of column names to load, where index column name(s) will be added
-   */
-  [[nodiscard]] std::vector<std::string> get_pandas_index_names() const
-  {
-    std::vector<std::string> names;
-    auto str = get_pandas_index();
-    if (str.length() != 0) {
-      std::regex index_name_expr{R"(\"((?:\\.|[^\"])*)\")"};
-      std::smatch sm;
-      while (std::regex_search(str, sm, index_name_expr)) {
-        if (sm.size() == 2) {  // 2 = whole match, first item
-          if (std::find(names.begin(), names.end(), sm[1].str()) == names.end()) {
-            std::regex esc_quote{R"(\\")"};
-            names.emplace_back(std::regex_replace(sm[1].str(), esc_quote, R"(")"));
-          }
-        }
-        str = sm.suffix();
-      }
-    }
-    return names;
-  }
-
-  struct row_group_info {
-    size_type const index;
-    size_t const start_row;  // TODO source index
-    size_type const source_index;
-    row_group_info(size_type index, size_t start_row, size_type source_index)
-      : index(index), start_row(start_row), source_index(source_index)
-    {
-    }
-  };
-
-  /**
-   * @brief Filters and reduces down to a selection of row groups
-   *
-   * @param row_groups Lists of row groups to read, one per source
-   * @param row_start Starting row of the selection
-   * @param row_count Total number of rows selected
-   *
-   * @return List of row group indexes and its starting row
-   */
-  [[nodiscard]] auto select_row_groups(std::vector<std::vector<size_type>> const& row_groups,
-                                       size_type& row_start,
-                                       size_type& row_count) const
-  {
-    if (!row_groups.empty()) {
-      std::vector<row_group_info> selection;
-      CUDF_EXPECTS(row_groups.size() == per_file_metadata.size(),
-                   "Must specify row groups for each source");
-
-      row_count = 0;
-      for (size_t src_idx = 0; src_idx < row_groups.size(); ++src_idx) {
-        for (auto const& rowgroup_idx : row_groups[src_idx]) {
-          CUDF_EXPECTS(
-            rowgroup_idx >= 0 &&
-              rowgroup_idx < static_cast<size_type>(per_file_metadata[src_idx].row_groups.size()),
-            "Invalid rowgroup index");
-          selection.emplace_back(rowgroup_idx, row_count, src_idx);
-          row_count += get_row_group(rowgroup_idx, src_idx).num_rows;
-        }
-      }
-      return selection;
-    }
-
-    row_start = std::max(row_start, 0);
-    if (row_count < 0) {
-      row_count = static_cast<size_type>(
-        std::min<int64_t>(get_num_rows(), std::numeric_limits<size_type>::max()));
-    }
-    row_count = min(row_count, get_num_rows() - row_start);
-    CUDF_EXPECTS(row_count >= 0, "Invalid row count");
-    CUDF_EXPECTS(row_start <= get_num_rows(), "Invalid row start");
-
-    std::vector<row_group_info> selection;
-    size_type count = 0;
-    for (size_t src_idx = 0; src_idx < per_file_metadata.size(); ++src_idx) {
-      for (size_t rg_idx = 0; rg_idx < per_file_metadata[src_idx].row_groups.size(); ++rg_idx) {
-        auto const chunk_start_row = count;
-        count += get_row_group(rg_idx, src_idx).num_rows;
-        if (count > row_start || count == 0) {
-          selection.emplace_back(rg_idx, chunk_start_row, src_idx);
-        }
-        if (count >= row_start + row_count) { break; }
-      }
-    }
-
-    return selection;
-  }
-
-  /**
-   * @brief Filters and reduces down to a selection of columns
-   *
-   * @param use_names List of paths of column names to select; `nullopt` if user did not select
-   * columns to read
-   * @param include_index Whether to always include the PANDAS index column(s)
-   * @param strings_to_categorical Type conversion parameter
-   * @param timestamp_type_id Type conversion parameter
-   *
-   * @return input column information, output column information, list of output column schema
-   * indices
-   */
-  [[nodiscard]] auto select_columns(std::optional<std::vector<std::string>> const& use_names,
-                                    bool include_index,
-                                    bool strings_to_categorical,
-                                    type_id timestamp_type_id) const
-  {
-    auto find_schema_child = [&](SchemaElement const& schema_elem, std::string const& name) {
-      auto const& col_schema_idx = std::find_if(
-        schema_elem.children_idx.cbegin(),
-        schema_elem.children_idx.cend(),
-        [&](size_t col_schema_idx) { return get_schema(col_schema_idx).name == name; });
-
-      return (col_schema_idx != schema_elem.children_idx.end()) ? static_cast<int>(*col_schema_idx)
-                                                                : -1;
-    };
-
-    std::vector<column_buffer> output_columns;
-    std::vector<input_column_info> input_columns;
-    std::vector<int> nesting;
-
-    // Return true if column path is valid. e.g. if the path is {"struct1", "child1"}, then it is
-    // valid if "struct1.child1" exists in this file's schema. If "struct1" exists but "child1" is
-    // not a child of "struct1" then the function will return false for "struct1"
-    std::function<bool(column_name_info const*, int, std::vector<column_buffer>&, bool)>
-      build_column = [&](column_name_info const* col_name_info,
-                         int schema_idx,
-                         std::vector<column_buffer>& out_col_array,
-                         bool has_list_parent) {
-        if (schema_idx < 0) { return false; }
-        auto const& schema_elem = get_schema(schema_idx);
-
-        // if schema_elem is a stub then it does not exist in the column_name_info and column_buffer
-        // hierarchy. So continue on
-        if (schema_elem.is_stub()) {
-          // is this legit?
-          CUDF_EXPECTS(schema_elem.num_children == 1, "Unexpected number of children for stub");
-          auto child_col_name_info = (col_name_info) ? &col_name_info->children[0] : nullptr;
-          return build_column(
-            child_col_name_info, schema_elem.children_idx[0], out_col_array, has_list_parent);
-        }
-
-        // if we're at the root, this is a new output column
-        auto const col_type =
-          schema_elem.is_one_level_list()
-            ? type_id::LIST
-            : to_type_id(schema_elem, strings_to_categorical, timestamp_type_id);
-        auto const dtype = to_data_type(col_type, schema_elem);
-
-        column_buffer output_col(dtype, schema_elem.repetition_type == OPTIONAL);
-        if (has_list_parent) { output_col.user_data |= PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT; }
-        // store the index of this element if inserted in out_col_array
-        nesting.push_back(static_cast<int>(out_col_array.size()));
-        output_col.name = schema_elem.name;
-
-        // build each child
-        bool path_is_valid = false;
-        if (col_name_info == nullptr or col_name_info->children.empty()) {
-          // add all children of schema_elem.
-          // At this point, we can no longer pass a col_name_info to build_column
-          for (int idx = 0; idx < schema_elem.num_children; idx++) {
-            path_is_valid |= build_column(nullptr,
-                                          schema_elem.children_idx[idx],
-                                          output_col.children,
-                                          has_list_parent || col_type == type_id::LIST);
-          }
-        } else {
-          for (size_t idx = 0; idx < col_name_info->children.size(); idx++) {
-            path_is_valid |=
-              build_column(&col_name_info->children[idx],
-                           find_schema_child(schema_elem, col_name_info->children[idx].name),
-                           output_col.children,
-                           has_list_parent || col_type == type_id::LIST);
-          }
-        }
-
-        // if I have no children, we're at a leaf and I'm an input column (that is, one with actual
-        // data stored) so add me to the list.
-        if (schema_elem.num_children == 0) {
-          input_column_info& input_col = input_columns.emplace_back(
-            input_column_info{schema_idx, schema_elem.name, schema_elem.max_repetition_level > 0});
-
-          // set up child output column for one-level encoding list
-          if (schema_elem.is_one_level_list()) {
-            // determine the element data type
-            auto const element_type =
-              to_type_id(schema_elem, strings_to_categorical, timestamp_type_id);
-            auto const element_dtype = to_data_type(element_type, schema_elem);
-
-            column_buffer element_col(element_dtype, schema_elem.repetition_type == OPTIONAL);
-            if (has_list_parent || col_type == type_id::LIST) {
-              element_col.user_data |= PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT;
-            }
-            // store the index of this element
-            nesting.push_back(static_cast<int>(output_col.children.size()));
-            // TODO: not sure if we should assign a name or leave it blank
-            element_col.name = "element";
-
-            output_col.children.push_back(std::move(element_col));
-          }
-
-          std::copy(nesting.cbegin(), nesting.cend(), std::back_inserter(input_col.nesting));
-
-          // pop off the extra nesting element.
-          if (schema_elem.is_one_level_list()) { nesting.pop_back(); }
-
-          path_is_valid = true;  // If we're able to reach leaf then path is valid
-        }
-
-        if (path_is_valid) { out_col_array.push_back(std::move(output_col)); }
-
-        nesting.pop_back();
-        return path_is_valid;
-      };
-
-    std::vector<int> output_column_schemas;
-
-    //
-    // there is not necessarily a 1:1 mapping between input columns and output columns.
-    // For example, parquet does not explicitly store a ColumnChunkDesc for struct columns.
-    // The "structiness" is simply implied by the schema.  For example, this schema:
-    //  required group field_id=1 name {
-    //    required binary field_id=2 firstname (String);
-    //    required binary field_id=3 middlename (String);
-    //    required binary field_id=4 lastname (String);
-    // }
-    // will only contain 3 internal columns of data (firstname, middlename, lastname).  But of
-    // course "name" is ultimately the struct column we want to return.
-    //
-    // "firstname", "middlename" and "lastname" represent the input columns in the file that we
-    // process to produce the final cudf "name" column.
-    //
-    // A user can ask for a single field out of the struct e.g. firstname.
-    // In this case they'll pass a fully qualified name to the schema element like
-    // ["name", "firstname"]
-    //
-    auto const& root = get_schema(0);
-    if (not use_names.has_value()) {
-      for (auto const& schema_idx : root.children_idx) {
-        build_column(nullptr, schema_idx, output_columns, false);
-        output_column_schemas.push_back(schema_idx);
-      }
-    } else {
-      struct path_info {
-        std::string full_path;
-        int schema_idx;
-      };
-
-      // Convert schema into a vector of every possible path
-      std::vector<path_info> all_paths;
-      std::function<void(std::string, int)> add_path = [&](std::string path_till_now,
-                                                           int schema_idx) {
-        auto const& schema_elem = get_schema(schema_idx);
-        std::string curr_path   = path_till_now + schema_elem.name;
-        all_paths.push_back({curr_path, schema_idx});
-        for (auto const& child_idx : schema_elem.children_idx) {
-          add_path(curr_path + ".", child_idx);
-        }
-      };
-      for (auto const& child_idx : get_schema(0).children_idx) {
-        add_path("", child_idx);
-      }
-
-      // Find which of the selected paths are valid and get their schema index
-      std::vector<path_info> valid_selected_paths;
-      for (auto const& selected_path : *use_names) {
-        auto found_path =
-          std::find_if(all_paths.begin(), all_paths.end(), [&](path_info& valid_path) {
-            return valid_path.full_path == selected_path;
-          });
-        if (found_path != all_paths.end()) {
-          valid_selected_paths.push_back({selected_path, found_path->schema_idx});
-        }
-      }
-
-      // Now construct paths as vector of strings for further consumption
-      std::vector<std::vector<std::string>> use_names3;
-      std::transform(valid_selected_paths.begin(),
-                     valid_selected_paths.end(),
-                     std::back_inserter(use_names3),
-                     [&](path_info const& valid_path) {
-                       auto schema_idx = valid_path.schema_idx;
-                       std::vector<std::string> result_path;
-                       do {
-                         SchemaElement const& elem = get_schema(schema_idx);
-                         result_path.push_back(elem.name);
-                         schema_idx = elem.parent_idx;
-                       } while (schema_idx > 0);
-                       return std::vector<std::string>(result_path.rbegin(), result_path.rend());
-                     });
-
-      std::vector<column_name_info> selected_columns;
-      if (include_index) {
-        std::vector<std::string> index_names = get_pandas_index_names();
-        std::transform(index_names.cbegin(),
-                       index_names.cend(),
-                       std::back_inserter(selected_columns),
-                       [](std::string const& name) { return column_name_info(name); });
-      }
-      // Merge the vector use_names into a set of hierarchical column_name_info objects
-      /* This is because if we have columns like this:
-       *     col1
-       *      / \
-       *    s3   f4
-       *   / \
-       * f5   f6
-       *
-       * there may be common paths in use_names like:
-       * {"col1", "s3", "f5"}, {"col1", "f4"}
-       * which means we want the output to contain
-       *     col1
-       *      / \
-       *    s3   f4
-       *   /
-       * f5
-       *
-       * rather than
-       *  col1   col1
-       *   |      |
-       *   s3     f4
-       *   |
-       *   f5
-       */
-      for (auto const& path : use_names3) {
-        auto array_to_find_in = &selected_columns;
-        for (size_t depth = 0; depth < path.size(); ++depth) {
-          // Check if the path exists in our selected_columns and if not, add it.
-          auto const& name_to_find = path[depth];
-          auto found_col           = std::find_if(
-            array_to_find_in->begin(),
-            array_to_find_in->end(),
-            [&name_to_find](column_name_info const& col) { return col.name == name_to_find; });
-          if (found_col == array_to_find_in->end()) {
-            auto& col        = array_to_find_in->emplace_back(name_to_find);
-            array_to_find_in = &col.children;
-          } else {
-            // Path exists. go down further.
-            array_to_find_in = &found_col->children;
-          }
-        }
-      }
-      for (auto& col : selected_columns) {
-        auto const& top_level_col_schema_idx = find_schema_child(root, col.name);
-        bool valid_column = build_column(&col, top_level_col_schema_idx, output_columns, false);
-        if (valid_column) output_column_schemas.push_back(top_level_col_schema_idx);
-      }
-    }
-
-    return std::make_tuple(
-      std::move(input_columns), std::move(output_columns), std::move(output_column_schemas));
-  }
-};
-
 /**
  * @brief Generate depth remappings for repetition and definition levels.
  *
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cu b/cpp/src/io/parquet/reader_impl_helpers.cu
new file mode 100644
index 00000000000..69d44014e92
--- /dev/null
+++ b/cpp/src/io/parquet/reader_impl_helpers.cu
@@ -0,0 +1,708 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO: cleanup
+#include "reader_impl.hpp"
+#include "reader_impl_helpers.cuh"
+
+#include "compact_protocol_reader.hpp"
+
+#include <io/comp/gpuinflate.hpp>
+#include <io/comp/nvcomp_adapter.hpp>
+#include <io/utilities/config_utils.hpp>
+#include <io/utilities/time_utils.cuh>
+
+#include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/traits.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_buffer.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/fill.h>
+#include <thrust/for_each.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/logical.h>
+#include <thrust/transform.h>
+#include <thrust/tuple.h>
+
+#include <algorithm>
+#include <array>
+#include <numeric>
+#include <regex>
+
+namespace cudf::io::detail::parquet {
+// Import functionality that's independent of legacy code
+using namespace cudf::io::parquet;
+using namespace cudf::io;
+
+parquet::ConvertedType logical_type_to_converted_type(parquet::LogicalType const& logical)
+{
+  if (logical.isset.STRING) {
+    return parquet::UTF8;
+  } else if (logical.isset.MAP) {
+    return parquet::MAP;
+  } else if (logical.isset.LIST) {
+    return parquet::LIST;
+  } else if (logical.isset.ENUM) {
+    return parquet::ENUM;
+  } else if (logical.isset.DECIMAL) {
+    return parquet::DECIMAL;  // TODO set decimal values
+  } else if (logical.isset.DATE) {
+    return parquet::DATE;
+  } else if (logical.isset.TIME) {
+    if (logical.TIME.unit.isset.MILLIS)
+      return parquet::TIME_MILLIS;
+    else if (logical.TIME.unit.isset.MICROS)
+      return parquet::TIME_MICROS;
+  } else if (logical.isset.TIMESTAMP) {
+    if (logical.TIMESTAMP.unit.isset.MILLIS)
+      return parquet::TIMESTAMP_MILLIS;
+    else if (logical.TIMESTAMP.unit.isset.MICROS)
+      return parquet::TIMESTAMP_MICROS;
+  } else if (logical.isset.INTEGER) {
+    switch (logical.INTEGER.bitWidth) {
+      case 8: return logical.INTEGER.isSigned ? INT_8 : UINT_8;
+      case 16: return logical.INTEGER.isSigned ? INT_16 : UINT_16;
+      case 32: return logical.INTEGER.isSigned ? INT_32 : UINT_32;
+      case 64: return logical.INTEGER.isSigned ? INT_64 : UINT_64;
+      default: break;
+    }
+  } else if (logical.isset.UNKNOWN) {
+    return parquet::NA;
+  } else if (logical.isset.JSON) {
+    return parquet::JSON;
+  } else if (logical.isset.BSON) {
+    return parquet::BSON;
+  }
+  return parquet::UNKNOWN;
+}
+
+/**
+ * @brief Function that translates Parquet datatype to cuDF type enum
+ */
+type_id to_type_id(SchemaElement const& schema,
+                   bool strings_to_categorical,
+                   type_id timestamp_type_id)
+{
+  parquet::Type const physical            = schema.type;
+  parquet::LogicalType const logical_type = schema.logical_type;
+  parquet::ConvertedType converted_type   = schema.converted_type;
+  int32_t decimal_scale                   = schema.decimal_scale;
+
+  // Logical type used for actual data interpretation; the legacy converted type
+  // is superceded by 'logical' type whenever available.
+  auto const inferred_converted_type = logical_type_to_converted_type(logical_type);
+  if (inferred_converted_type != parquet::UNKNOWN) converted_type = inferred_converted_type;
+  if (inferred_converted_type == parquet::DECIMAL && decimal_scale == 0)
+    decimal_scale = schema.logical_type.DECIMAL.scale;
+
+  switch (converted_type) {
+    case parquet::UINT_8: return type_id::UINT8;
+    case parquet::INT_8: return type_id::INT8;
+    case parquet::UINT_16: return type_id::UINT16;
+    case parquet::INT_16: return type_id::INT16;
+    case parquet::UINT_32: return type_id::UINT32;
+    case parquet::UINT_64: return type_id::UINT64;
+    case parquet::DATE: return type_id::TIMESTAMP_DAYS;
+    case parquet::TIME_MILLIS:
+      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
+                                                   : type_id::DURATION_MILLISECONDS;
+    case parquet::TIME_MICROS:
+      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
+                                                   : type_id::DURATION_MICROSECONDS;
+    case parquet::TIMESTAMP_MILLIS:
+      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
+                                                   : type_id::TIMESTAMP_MILLISECONDS;
+    case parquet::TIMESTAMP_MICROS:
+      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
+                                                   : type_id::TIMESTAMP_MICROSECONDS;
+    case parquet::DECIMAL:
+      if (physical == parquet::INT32) { return type_id::DECIMAL32; }
+      if (physical == parquet::INT64) { return type_id::DECIMAL64; }
+      if (physical == parquet::FIXED_LEN_BYTE_ARRAY) {
+        if (schema.type_length <= static_cast<int32_t>(sizeof(int32_t))) {
+          return type_id::DECIMAL32;
+        }
+        if (schema.type_length <= static_cast<int32_t>(sizeof(int64_t))) {
+          return type_id::DECIMAL64;
+        }
+        if (schema.type_length <= static_cast<int32_t>(sizeof(__int128_t))) {
+          return type_id::DECIMAL128;
+        }
+      }
+      CUDF_FAIL("Invalid representation of decimal type");
+      break;
+
+    // maps are just List<Struct<>>.
+    case parquet::MAP:
+    case parquet::LIST: return type_id::LIST;
+    case parquet::NA: return type_id::STRING;
+    // return type_id::EMPTY; //TODO(kn): enable after Null/Empty column support
+    default: break;
+  }
+
+  if (inferred_converted_type == parquet::UNKNOWN and physical == parquet::INT64 and
+      logical_type.TIMESTAMP.unit.isset.NANOS) {
+    return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
+                                                 : type_id::TIMESTAMP_NANOSECONDS;
+  }
+
+  // is it simply a struct?
+  if (schema.is_struct()) { return type_id::STRUCT; }
+
+  // Physical storage type supported by Parquet; controls the on-disk storage
+  // format in combination with the encoding type.
+  switch (physical) {
+    case parquet::BOOLEAN: return type_id::BOOL8;
+    case parquet::INT32: return type_id::INT32;
+    case parquet::INT64: return type_id::INT64;
+    case parquet::FLOAT: return type_id::FLOAT32;
+    case parquet::DOUBLE: return type_id::FLOAT64;
+    case parquet::BYTE_ARRAY:
+    case parquet::FIXED_LEN_BYTE_ARRAY:
+      // Can be mapped to INT32 (32-bit hash) or STRING
+      return strings_to_categorical ? type_id::INT32 : type_id::STRING;
+    case parquet::INT96:
+      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
+                                                   : type_id::TIMESTAMP_NANOSECONDS;
+    default: break;
+  }
+
+  return type_id::EMPTY;
+}
+
+/**
+ * @brief Converts cuDF units to Parquet units.
+ *
+ * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type.
+ */
+std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
+                                                     type_id timestamp_type_id,
+                                                     parquet::Type physical,
+                                                     int8_t converted,
+                                                     int32_t length)
+{
+  int32_t type_width = (physical == parquet::FIXED_LEN_BYTE_ARRAY) ? length : 0;
+  int32_t clock_rate = 0;
+  if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) {
+    type_width = 1;  // I32 -> I8
+  } else if (column_type_id == type_id::INT16 or column_type_id == type_id::UINT16) {
+    type_width = 2;  // I32 -> I16
+  } else if (column_type_id == type_id::INT32) {
+    type_width = 4;  // str -> hash32
+  } else if (is_chrono(data_type{column_type_id})) {
+    clock_rate = to_clockrate(timestamp_type_id);
+  }
+
+  int8_t converted_type = converted;
+  if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 &&
+      not cudf::is_fixed_point(data_type{column_type_id})) {
+    converted_type = parquet::UNKNOWN;  // Not converting to float64 or decimal
+  }
+  return std::make_tuple(type_width, clock_rate, converted_type);
+}
+
+std::vector<metadata> aggregate_reader_metadata::metadatas_from_sources(
+  std::vector<std::unique_ptr<datasource>> const& sources)
+{
+  std::vector<metadata> metadatas;
+  std::transform(
+    sources.cbegin(), sources.cend(), std::back_inserter(metadatas), [](auto const& source) {
+      return metadata(source.get());
+    });
+  return metadatas;
+}
+
+/**
+ * @brief Collect the keyvalue maps from each per-file metadata object into a vector of maps.
+ */
+std::vector<std::unordered_map<std::string, std::string>>
+aggregate_reader_metadata::collect_keyval_metadata()
+{
+  std::vector<std::unordered_map<std::string, std::string>> kv_maps;
+  std::transform(per_file_metadata.cbegin(),
+                 per_file_metadata.cend(),
+                 std::back_inserter(kv_maps),
+                 [](auto const& pfm) {
+                   std::unordered_map<std::string, std::string> kv_map;
+                   std::transform(pfm.key_value_metadata.cbegin(),
+                                  pfm.key_value_metadata.cend(),
+                                  std::inserter(kv_map, kv_map.end()),
+                                  [](auto const& kv) {
+                                    return std::pair{kv.key, kv.value};
+                                  });
+                   return kv_map;
+                 });
+
+  return kv_maps;
+}
+
+/**
+ * @brief Sums up the number of rows of each source
+ */
+size_type aggregate_reader_metadata::calc_num_rows() const
+{
+  return std::accumulate(
+    per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) {
+      return sum + pfm.num_rows;
+    });
+}
+
+/**
+ * @brief Sums up the number of row groups of each source
+ */
+size_type aggregate_reader_metadata::calc_num_row_groups() const
+{
+  return std::accumulate(
+    per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) {
+      return sum + pfm.row_groups.size();
+    });
+}
+
+aggregate_reader_metadata::aggregate_reader_metadata(
+  std::vector<std::unique_ptr<datasource>> const& sources)
+  : per_file_metadata(metadatas_from_sources(sources)),
+    keyval_maps(collect_keyval_metadata()),
+    num_rows(calc_num_rows()),
+    num_row_groups(calc_num_row_groups())
+{
+  // Verify that the input files have matching numbers of columns
+  size_type num_cols = -1;
+  for (auto const& pfm : per_file_metadata) {
+    if (pfm.row_groups.size() != 0) {
+      if (num_cols == -1)
+        num_cols = pfm.row_groups[0].columns.size();
+      else
+        CUDF_EXPECTS(num_cols == static_cast<size_type>(pfm.row_groups[0].columns.size()),
+                     "All sources must have the same number of columns");
+    }
+  }
+  // Verify that the input files have matching schemas
+  for (auto const& pfm : per_file_metadata) {
+    CUDF_EXPECTS(per_file_metadata[0].schema == pfm.schema,
+                 "All sources must have the same schemas");
+  }
+}
+
+RowGroup const& aggregate_reader_metadata::get_row_group(size_type row_group_index,
+                                                         size_type src_idx) const
+{
+  CUDF_EXPECTS(src_idx >= 0 && src_idx < static_cast<size_type>(per_file_metadata.size()),
+               "invalid source index");
+  return per_file_metadata[src_idx].row_groups[row_group_index];
+}
+
+ColumnChunkMetaData const& aggregate_reader_metadata::get_column_metadata(size_type row_group_index,
+                                                                          size_type src_idx,
+                                                                          int schema_idx) const
+{
+  auto col = std::find_if(
+    per_file_metadata[src_idx].row_groups[row_group_index].columns.begin(),
+    per_file_metadata[src_idx].row_groups[row_group_index].columns.end(),
+    [schema_idx](ColumnChunk const& col) { return col.schema_idx == schema_idx ? true : false; });
+  CUDF_EXPECTS(col != std::end(per_file_metadata[src_idx].row_groups[row_group_index].columns),
+               "Found no metadata for schema index");
+  return col->meta_data;
+}
+
+/**
+ * @brief Extracts the pandas "index_columns" section
+ *
+ * PANDAS adds its own metadata to the key_value section when writing out the
+ * dataframe to a file to aid in exact reconstruction. The JSON-formatted
+ * metadata contains the index column(s) and PANDA-specific datatypes.
+ *
+ * @return comma-separated index column names in quotes
+ */
+std::string aggregate_reader_metadata::get_pandas_index() const
+{
+  // Assumes that all input files have the same metadata
+  // TODO: verify this assumption
+  auto it = keyval_maps[0].find("pandas");
+  if (it != keyval_maps[0].end()) {
+    // Captures a list of quoted strings found inside square brackets after `"index_columns":`
+    // Inside quotes supports newlines, brackets, escaped quotes, etc.
+    // One-liner regex:
+    // "index_columns"\s*:\s*\[\s*((?:"(?:|(?:.*?(?![^\\]")).?)[^\\]?",?\s*)*)\]
+    // Documented below.
+    std::regex index_columns_expr{
+      R"("index_columns"\s*:\s*\[\s*)"  // match preamble, opening square bracket, whitespace
+      R"(()"                            // Open first capturing group
+      R"((?:")"                         // Open non-capturing group match opening quote
+      R"((?:|(?:.*?(?![^\\]")).?))"     // match empty string or anything between quotes
+      R"([^\\]?")"                      // Match closing non-escaped quote
+      R"(,?\s*)"                        // Match optional comma and whitespace
+      R"()*)"                           // Close non-capturing group and repeat 0 or more times
+      R"())"                            // Close first capturing group
+      R"(\])"                           // Match closing square brackets
+    };
+    std::smatch sm;
+    if (std::regex_search(it->second, sm, index_columns_expr)) { return sm[1].str(); }
+  }
+  return "";
+}
+
+/**
+ * @brief Extracts the column name(s) used for the row indexes in a dataframe
+ *
+ * @param names List of column names to load, where index column name(s) will be added
+ */
+std::vector<std::string> aggregate_reader_metadata::get_pandas_index_names() const
+{
+  std::vector<std::string> names;
+  auto str = get_pandas_index();
+  if (str.length() != 0) {
+    std::regex index_name_expr{R"(\"((?:\\.|[^\"])*)\")"};
+    std::smatch sm;
+    while (std::regex_search(str, sm, index_name_expr)) {
+      if (sm.size() == 2) {  // 2 = whole match, first item
+        if (std::find(names.begin(), names.end(), sm[1].str()) == names.end()) {
+          std::regex esc_quote{R"(\\")"};
+          names.emplace_back(std::regex_replace(sm[1].str(), esc_quote, R"(")"));
+        }
+      }
+      str = sm.suffix();
+    }
+  }
+  return names;
+}
+
+/**
+ * @brief Filters and reduces down to a selection of row groups
+ *
+ * @param row_groups Lists of row groups to read, one per source
+ * @param row_start Starting row of the selection
+ * @param row_count Total number of rows selected
+ *
+ * @return List of row group indexes and its starting row
+ */
+std::vector<aggregate_reader_metadata::row_group_info> aggregate_reader_metadata::select_row_groups(
+  std::vector<std::vector<size_type>> const& row_groups,
+  size_type& row_start,
+  size_type& row_count) const
+{
+  if (!row_groups.empty()) {
+    std::vector<row_group_info> selection;
+    CUDF_EXPECTS(row_groups.size() == per_file_metadata.size(),
+                 "Must specify row groups for each source");
+
+    row_count = 0;
+    for (size_t src_idx = 0; src_idx < row_groups.size(); ++src_idx) {
+      for (auto const& rowgroup_idx : row_groups[src_idx]) {
+        CUDF_EXPECTS(
+          rowgroup_idx >= 0 &&
+            rowgroup_idx < static_cast<size_type>(per_file_metadata[src_idx].row_groups.size()),
+          "Invalid rowgroup index");
+        selection.emplace_back(rowgroup_idx, row_count, src_idx);
+        row_count += get_row_group(rowgroup_idx, src_idx).num_rows;
+      }
+    }
+    return selection;
+  }
+
+  row_start = std::max(row_start, 0);
+  if (row_count < 0) {
+    row_count = static_cast<size_type>(
+      std::min<int64_t>(get_num_rows(), std::numeric_limits<size_type>::max()));
+  }
+  row_count = min(row_count, get_num_rows() - row_start);
+  CUDF_EXPECTS(row_count >= 0, "Invalid row count");
+  CUDF_EXPECTS(row_start <= get_num_rows(), "Invalid row start");
+
+  std::vector<row_group_info> selection;
+  size_type count = 0;
+  for (size_t src_idx = 0; src_idx < per_file_metadata.size(); ++src_idx) {
+    for (size_t rg_idx = 0; rg_idx < per_file_metadata[src_idx].row_groups.size(); ++rg_idx) {
+      auto const chunk_start_row = count;
+      count += get_row_group(rg_idx, src_idx).num_rows;
+      if (count > row_start || count == 0) {
+        selection.emplace_back(rg_idx, chunk_start_row, src_idx);
+      }
+      if (count >= row_start + row_count) { break; }
+    }
+  }
+
+  return selection;
+}
+
+/**
+ * @brief Filters and reduces down to a selection of columns
+ *
+ * @param use_names List of paths of column names to select; `nullopt` if user did not select
+ * columns to read
+ * @param include_index Whether to always include the PANDAS index column(s)
+ * @param strings_to_categorical Type conversion parameter
+ * @param timestamp_type_id Type conversion parameter
+ *
+ * @return input column information, output column information, list of output column schema
+ * indices
+ */
+std::tuple<std::vector<input_column_info>, std::vector<column_buffer>, std::vector<int>>
+aggregate_reader_metadata::select_columns(std::optional<std::vector<std::string>> const& use_names,
+                                          bool include_index,
+                                          bool strings_to_categorical,
+                                          type_id timestamp_type_id) const
+{
+  auto find_schema_child = [&](SchemaElement const& schema_elem, std::string const& name) {
+    auto const& col_schema_idx =
+      std::find_if(schema_elem.children_idx.cbegin(),
+                   schema_elem.children_idx.cend(),
+                   [&](size_t col_schema_idx) { return get_schema(col_schema_idx).name == name; });
+
+    return (col_schema_idx != schema_elem.children_idx.end()) ? static_cast<int>(*col_schema_idx)
+                                                              : -1;
+  };
+
+  std::vector<column_buffer> output_columns;
+  std::vector<input_column_info> input_columns;
+  std::vector<int> nesting;
+
+  // Return true if column path is valid. e.g. if the path is {"struct1", "child1"}, then it is
+  // valid if "struct1.child1" exists in this file's schema. If "struct1" exists but "child1" is
+  // not a child of "struct1" then the function will return false for "struct1"
+  std::function<bool(column_name_info const*, int, std::vector<column_buffer>&, bool)>
+    build_column = [&](column_name_info const* col_name_info,
+                       int schema_idx,
+                       std::vector<column_buffer>& out_col_array,
+                       bool has_list_parent) {
+      if (schema_idx < 0) { return false; }
+      auto const& schema_elem = get_schema(schema_idx);
+
+      // if schema_elem is a stub then it does not exist in the column_name_info and column_buffer
+      // hierarchy. So continue on
+      if (schema_elem.is_stub()) {
+        // is this legit?
+        CUDF_EXPECTS(schema_elem.num_children == 1, "Unexpected number of children for stub");
+        auto child_col_name_info = (col_name_info) ? &col_name_info->children[0] : nullptr;
+        return build_column(
+          child_col_name_info, schema_elem.children_idx[0], out_col_array, has_list_parent);
+      }
+
+      // if we're at the root, this is a new output column
+      auto const col_type = schema_elem.is_one_level_list()
+                              ? type_id::LIST
+                              : to_type_id(schema_elem, strings_to_categorical, timestamp_type_id);
+      auto const dtype    = to_data_type(col_type, schema_elem);
+
+      column_buffer output_col(dtype, schema_elem.repetition_type == OPTIONAL);
+      if (has_list_parent) { output_col.user_data |= PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT; }
+      // store the index of this element if inserted in out_col_array
+      nesting.push_back(static_cast<int>(out_col_array.size()));
+      output_col.name = schema_elem.name;
+
+      // build each child
+      bool path_is_valid = false;
+      if (col_name_info == nullptr or col_name_info->children.empty()) {
+        // add all children of schema_elem.
+        // At this point, we can no longer pass a col_name_info to build_column
+        for (int idx = 0; idx < schema_elem.num_children; idx++) {
+          path_is_valid |= build_column(nullptr,
+                                        schema_elem.children_idx[idx],
+                                        output_col.children,
+                                        has_list_parent || col_type == type_id::LIST);
+        }
+      } else {
+        for (size_t idx = 0; idx < col_name_info->children.size(); idx++) {
+          path_is_valid |=
+            build_column(&col_name_info->children[idx],
+                         find_schema_child(schema_elem, col_name_info->children[idx].name),
+                         output_col.children,
+                         has_list_parent || col_type == type_id::LIST);
+        }
+      }
+
+      // if I have no children, we're at a leaf and I'm an input column (that is, one with actual
+      // data stored) so add me to the list.
+      if (schema_elem.num_children == 0) {
+        input_column_info& input_col = input_columns.emplace_back(
+          input_column_info{schema_idx, schema_elem.name, schema_elem.max_repetition_level > 0});
+
+        // set up child output column for one-level encoding list
+        if (schema_elem.is_one_level_list()) {
+          // determine the element data type
+          auto const element_type =
+            to_type_id(schema_elem, strings_to_categorical, timestamp_type_id);
+          auto const element_dtype = to_data_type(element_type, schema_elem);
+
+          column_buffer element_col(element_dtype, schema_elem.repetition_type == OPTIONAL);
+          if (has_list_parent || col_type == type_id::LIST) {
+            element_col.user_data |= PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT;
+          }
+          // store the index of this element
+          nesting.push_back(static_cast<int>(output_col.children.size()));
+          // TODO: not sure if we should assign a name or leave it blank
+          element_col.name = "element";
+
+          output_col.children.push_back(std::move(element_col));
+        }
+
+        std::copy(nesting.cbegin(), nesting.cend(), std::back_inserter(input_col.nesting));
+
+        // pop off the extra nesting element.
+        if (schema_elem.is_one_level_list()) { nesting.pop_back(); }
+
+        path_is_valid = true;  // If we're able to reach leaf then path is valid
+      }
+
+      if (path_is_valid) { out_col_array.push_back(std::move(output_col)); }
+
+      nesting.pop_back();
+      return path_is_valid;
+    };
+
+  std::vector<int> output_column_schemas;
+
+  //
+  // there is not necessarily a 1:1 mapping between input columns and output columns.
+  // For example, parquet does not explicitly store a ColumnChunkDesc for struct columns.
+  // The "structiness" is simply implied by the schema.  For example, this schema:
+  //  required group field_id=1 name {
+  //    required binary field_id=2 firstname (String);
+  //    required binary field_id=3 middlename (String);
+  //    required binary field_id=4 lastname (String);
+  // }
+  // will only contain 3 internal columns of data (firstname, middlename, lastname).  But of
+  // course "name" is ultimately the struct column we want to return.
+  //
+  // "firstname", "middlename" and "lastname" represent the input columns in the file that we
+  // process to produce the final cudf "name" column.
+  //
+  // A user can ask for a single field out of the struct e.g. firstname.
+  // In this case they'll pass a fully qualified name to the schema element like
+  // ["name", "firstname"]
+  //
+  auto const& root = get_schema(0);
+  if (not use_names.has_value()) {
+    for (auto const& schema_idx : root.children_idx) {
+      build_column(nullptr, schema_idx, output_columns, false);
+      output_column_schemas.push_back(schema_idx);
+    }
+  } else {
+    struct path_info {
+      std::string full_path;
+      int schema_idx;
+    };
+
+    // Convert schema into a vector of every possible path
+    std::vector<path_info> all_paths;
+    std::function<void(std::string, int)> add_path = [&](std::string path_till_now,
+                                                         int schema_idx) {
+      auto const& schema_elem = get_schema(schema_idx);
+      std::string curr_path   = path_till_now + schema_elem.name;
+      all_paths.push_back({curr_path, schema_idx});
+      for (auto const& child_idx : schema_elem.children_idx) {
+        add_path(curr_path + ".", child_idx);
+      }
+    };
+    for (auto const& child_idx : get_schema(0).children_idx) {
+      add_path("", child_idx);
+    }
+
+    // Find which of the selected paths are valid and get their schema index
+    std::vector<path_info> valid_selected_paths;
+    for (auto const& selected_path : *use_names) {
+      auto found_path =
+        std::find_if(all_paths.begin(), all_paths.end(), [&](path_info& valid_path) {
+          return valid_path.full_path == selected_path;
+        });
+      if (found_path != all_paths.end()) {
+        valid_selected_paths.push_back({selected_path, found_path->schema_idx});
+      }
+    }
+
+    // Now construct paths as vector of strings for further consumption
+    std::vector<std::vector<std::string>> use_names3;
+    std::transform(valid_selected_paths.begin(),
+                   valid_selected_paths.end(),
+                   std::back_inserter(use_names3),
+                   [&](path_info const& valid_path) {
+                     auto schema_idx = valid_path.schema_idx;
+                     std::vector<std::string> result_path;
+                     do {
+                       SchemaElement const& elem = get_schema(schema_idx);
+                       result_path.push_back(elem.name);
+                       schema_idx = elem.parent_idx;
+                     } while (schema_idx > 0);
+                     return std::vector<std::string>(result_path.rbegin(), result_path.rend());
+                   });
+
+    std::vector<column_name_info> selected_columns;
+    if (include_index) {
+      std::vector<std::string> index_names = get_pandas_index_names();
+      std::transform(index_names.cbegin(),
+                     index_names.cend(),
+                     std::back_inserter(selected_columns),
+                     [](std::string const& name) { return column_name_info(name); });
+    }
+    // Merge the vector use_names into a set of hierarchical column_name_info objects
+    /* This is because if we have columns like this:
+     *     col1
+     *      / \
+     *    s3   f4
+     *   / \
+     * f5   f6
+     *
+     * there may be common paths in use_names like:
+     * {"col1", "s3", "f5"}, {"col1", "f4"}
+     * which means we want the output to contain
+     *     col1
+     *      / \
+     *    s3   f4
+     *   /
+     * f5
+     *
+     * rather than
+     *  col1   col1
+     *   |      |
+     *   s3     f4
+     *   |
+     *   f5
+     */
+    for (auto const& path : use_names3) {
+      auto array_to_find_in = &selected_columns;
+      for (size_t depth = 0; depth < path.size(); ++depth) {
+        // Check if the path exists in our selected_columns and if not, add it.
+        auto const& name_to_find = path[depth];
+        auto found_col           = std::find_if(
+          array_to_find_in->begin(),
+          array_to_find_in->end(),
+          [&name_to_find](column_name_info const& col) { return col.name == name_to_find; });
+        if (found_col == array_to_find_in->end()) {
+          auto& col        = array_to_find_in->emplace_back(name_to_find);
+          array_to_find_in = &col.children;
+        } else {
+          // Path exists. go down further.
+          array_to_find_in = &found_col->children;
+        }
+      }
+    }
+    for (auto& col : selected_columns) {
+      auto const& top_level_col_schema_idx = find_schema_child(root, col.name);
+      bool valid_column = build_column(&col, top_level_col_schema_idx, output_columns, false);
+      if (valid_column) output_column_schemas.push_back(top_level_col_schema_idx);
+    }
+  }
+
+  return std::make_tuple(
+    std::move(input_columns), std::move(output_columns), std::move(output_column_schemas));
+}
+
+}  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cuh b/cpp/src/io/parquet/reader_impl_helpers.cuh
new file mode 100644
index 00000000000..43906c52732
--- /dev/null
+++ b/cpp/src/io/parquet/reader_impl_helpers.cuh
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO: cleanup
+#include "reader_impl.hpp"
+
+#include "compact_protocol_reader.hpp"
+
+#include <io/comp/gpuinflate.hpp>
+#include <io/comp/nvcomp_adapter.hpp>
+#include <io/utilities/config_utils.hpp>
+#include <io/utilities/time_utils.cuh>
+
+#include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/traits.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_buffer.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/fill.h>
+#include <thrust/for_each.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/logical.h>
+#include <thrust/transform.h>
+#include <thrust/tuple.h>
+
+#include <algorithm>
+#include <array>
+#include <numeric>
+#include <regex>
+
+namespace cudf::io::detail::parquet {
+// Import functionality that's independent of legacy code
+using namespace cudf::io::parquet;
+using namespace cudf::io;
+
+parquet::ConvertedType logical_type_to_converted_type(parquet::LogicalType const& logical);
+
+/**
+ * @brief Function that translates Parquet datatype to cuDF type enum
+ */
+type_id to_type_id(SchemaElement const& schema,
+                   bool strings_to_categorical,
+                   type_id timestamp_type_id);
+
+/**
+ * @brief Converts cuDF type enum to column logical type
+ */
+inline data_type to_data_type(type_id t_id, SchemaElement const& schema)
+{
+  return t_id == type_id::DECIMAL32 || t_id == type_id::DECIMAL64 || t_id == type_id::DECIMAL128
+           ? data_type{t_id, numeric::scale_type{-schema.decimal_scale}}
+           : data_type{t_id};
+}
+
+/**
+ * @brief Function that returns the required the number of bits to store a value
+ */
+template <typename T = uint8_t>
+T required_bits(uint32_t max_level)
+{
+  return static_cast<T>(CompactProtocolReader::NumRequiredBits(max_level));
+}
+
+/**
+ * @brief Converts cuDF units to Parquet units.
+ *
+ * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type.
+ */
+std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
+                                                     type_id timestamp_type_id,
+                                                     parquet::Type physical,
+                                                     int8_t converted,
+                                                     int32_t length);
+
+inline void decompress_check(device_span<compression_result const> results,
+                             rmm::cuda_stream_view stream)
+{
+  CUDF_EXPECTS(thrust::all_of(rmm::exec_policy(stream),
+                              results.begin(),
+                              results.end(),
+                              [] __device__(auto const& res) {
+                                return res.status == compression_status::SUCCESS;
+                              }),
+               "Error during decompression");
+}
+
+/**
+ * @brief Class for parsing dataset metadata
+ */
+struct metadata : public FileMetaData {
+  explicit metadata(datasource* source)
+  {
+    constexpr auto header_len = sizeof(file_header_s);
+    constexpr auto ender_len  = sizeof(file_ender_s);
+
+    const auto len           = source->size();
+    const auto header_buffer = source->host_read(0, header_len);
+    const auto header        = reinterpret_cast<const file_header_s*>(header_buffer->data());
+    const auto ender_buffer  = source->host_read(len - ender_len, ender_len);
+    const auto ender         = reinterpret_cast<const file_ender_s*>(ender_buffer->data());
+    CUDF_EXPECTS(len > header_len + ender_len, "Incorrect data source");
+    CUDF_EXPECTS(header->magic == parquet_magic && ender->magic == parquet_magic,
+                 "Corrupted header or footer");
+    CUDF_EXPECTS(ender->footer_len != 0 && ender->footer_len <= (len - header_len - ender_len),
+                 "Incorrect footer length");
+
+    const auto buffer = source->host_read(len - ender->footer_len - ender_len, ender->footer_len);
+    CompactProtocolReader cp(buffer->data(), ender->footer_len);
+    CUDF_EXPECTS(cp.read(this), "Cannot parse metadata");
+    CUDF_EXPECTS(cp.InitSchema(this), "Cannot initialize schema");
+  }
+};
+
+class aggregate_reader_metadata {
+  std::vector<metadata> per_file_metadata;
+  std::vector<std::unordered_map<std::string, std::string>> keyval_maps;
+  size_type num_rows;
+  size_type num_row_groups;
+  /**
+   * @brief Create a metadata object from each element in the source vector
+   */
+  std::vector<metadata> metadatas_from_sources(
+    std::vector<std::unique_ptr<datasource>> const& sources);
+
+  /**
+   * @brief Collect the keyvalue maps from each per-file metadata object into a vector of maps.
+   */
+  [[nodiscard]] std::vector<std::unordered_map<std::string, std::string>> collect_keyval_metadata();
+
+  /**
+   * @brief Sums up the number of rows of each source
+   */
+  [[nodiscard]] size_type calc_num_rows() const;
+
+  /**
+   * @brief Sums up the number of row groups of each source
+   */
+  [[nodiscard]] size_type calc_num_row_groups() const;
+
+ public:
+  aggregate_reader_metadata(std::vector<std::unique_ptr<datasource>> const& sources);
+
+  [[nodiscard]] RowGroup const& get_row_group(size_type row_group_index, size_type src_idx) const;
+
+  [[nodiscard]] ColumnChunkMetaData const& get_column_metadata(size_type row_group_index,
+                                                               size_type src_idx,
+                                                               int schema_idx) const;
+
+  [[nodiscard]] auto get_num_rows() const { return num_rows; }
+
+  [[nodiscard]] auto get_num_row_groups() const { return num_row_groups; }
+
+  [[nodiscard]] auto const& get_schema(int schema_idx) const
+  {
+    return per_file_metadata[0].schema[schema_idx];
+  }
+
+  [[nodiscard]] auto const& get_key_value_metadata() const { return keyval_maps; }
+
+  /**
+   * @brief Gets the concrete nesting depth of output cudf columns
+   *
+   * @param schema_index Schema index of the input column
+   *
+   * @return comma-separated index column names in quotes
+   */
+  [[nodiscard]] inline int get_output_nesting_depth(int schema_index) const
+  {
+    auto& pfm = per_file_metadata[0];
+    int depth = 0;
+
+    // walk upwards, skipping repeated fields
+    while (schema_index > 0) {
+      if (!pfm.schema[schema_index].is_stub()) { depth++; }
+      // schema of one-level encoding list doesn't contain nesting information, so we need to
+      // manually add an extra nesting level
+      if (pfm.schema[schema_index].is_one_level_list()) { depth++; }
+      schema_index = pfm.schema[schema_index].parent_idx;
+    }
+    return depth;
+  }
+
+  /**
+   * @brief Extracts the pandas "index_columns" section
+   *
+   * PANDAS adds its own metadata to the key_value section when writing out the
+   * dataframe to a file to aid in exact reconstruction. The JSON-formatted
+   * metadata contains the index column(s) and PANDA-specific datatypes.
+   *
+   * @return comma-separated index column names in quotes
+   */
+  [[nodiscard]] std::string get_pandas_index() const;
+
+  /**
+   * @brief Extracts the column name(s) used for the row indexes in a dataframe
+   *
+   * @param names List of column names to load, where index column name(s) will be added
+   */
+  [[nodiscard]] std::vector<std::string> get_pandas_index_names() const;
+
+  struct row_group_info {
+    size_type const index;
+    size_t const start_row;  // TODO source index
+    size_type const source_index;
+    row_group_info(size_type index, size_t start_row, size_type source_index)
+      : index(index), start_row(start_row), source_index(source_index)
+    {
+    }
+  };
+
+  /**
+   * @brief Filters and reduces down to a selection of row groups
+   *
+   * @param row_groups Lists of row groups to read, one per source
+   * @param row_start Starting row of the selection
+   * @param row_count Total number of rows selected
+   *
+   * @return List of row group indexes and its starting row
+   */
+  [[nodiscard]] std::vector<row_group_info> select_row_groups(
+    std::vector<std::vector<size_type>> const& row_groups,
+    size_type& row_start,
+    size_type& row_count) const;
+
+  /**
+   * @brief Filters and reduces down to a selection of columns
+   *
+   * @param use_names List of paths of column names to select; `nullopt` if user did not select
+   * columns to read
+   * @param include_index Whether to always include the PANDAS index column(s)
+   * @param strings_to_categorical Type conversion parameter
+   * @param timestamp_type_id Type conversion parameter
+   *
+   * @return input column information, output column information, list of output column schema
+   * indices
+   */
+  [[nodiscard]] std::
+    tuple<std::vector<input_column_info>, std::vector<column_buffer>, std::vector<int>>
+    select_columns(std::optional<std::vector<std::string>> const& use_names,
+                   bool include_index,
+                   bool strings_to_categorical,
+                   type_id timestamp_type_id) const;
+};
+
+}  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
similarity index 99%
rename from cpp/src/io/parquet/reader_preprocess.cu
rename to cpp/src/io/parquet/reader_impl_preprocess.cu
index 29c3f7635af..f1d6fa203cd 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -15,6 +15,7 @@
  */
 
 #include "reader_impl.hpp"
+#include "reader_impl_helpers.cuh"
 
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/integer_utils.hpp>

From 6569d62247ae7a0ac1df98cce562f2facd347575 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 24 Oct 2022 16:31:30 -0700
Subject: [PATCH 089/162] Cleanup

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu            | 36 ++++----
 cpp/src/io/parquet/reader_impl_helpers.cu    | 95 +++++++-------------
 cpp/src/io/parquet/reader_impl_helpers.cuh   | 93 +++----------------
 cpp/src/io/parquet/reader_impl_preprocess.cu | 55 ++++++++++--
 cpp/tests/CMakeLists.txt                     |  1 +
 5 files changed, 107 insertions(+), 173 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 5229c0dc58b..fa49e60b742 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -22,18 +22,10 @@
 #include "reader_impl.hpp"
 #include "reader_impl_helpers.cuh"
 
-#include "compact_protocol_reader.hpp"
-
-#include <io/comp/gpuinflate.hpp>
 #include <io/comp/nvcomp_adapter.hpp>
 #include <io/utilities/config_utils.hpp>
-#include <io/utilities/time_utils.cuh>
 
-#include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/table/table.hpp>
-#include <cudf/utilities/error.hpp>
-#include <cudf/utilities/traits.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
@@ -41,24 +33,28 @@
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/fill.h>
-#include <thrust/for_each.h>
-#include <thrust/iterator/zip_iterator.h>
 #include <thrust/logical.h>
-#include <thrust/transform.h>
-#include <thrust/tuple.h>
-
-#include <algorithm>
-#include <array>
-#include <numeric>
-#include <regex>
 
 namespace cudf {
 namespace io {
 namespace detail {
 namespace parquet {
-// Import functionality that's independent of legacy code
-using namespace cudf::io::parquet;
-using namespace cudf::io;
+
+namespace {
+
+inline void decompress_check(device_span<compression_result const> results,
+                             rmm::cuda_stream_view stream)
+{
+  CUDF_EXPECTS(thrust::all_of(rmm::exec_policy(stream),
+                              results.begin(),
+                              results.end(),
+                              [] __device__(auto const& res) {
+                                return res.status == compression_status::SUCCESS;
+                              }),
+               "Error during decompression");
+}
+
+}  // namespace
 
 /**
  * @brief Generate depth remappings for repetition and definition levels.
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cu b/cpp/src/io/parquet/reader_impl_helpers.cu
index 69d44014e92..9d155792a30 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cu
+++ b/cpp/src/io/parquet/reader_impl_helpers.cu
@@ -14,46 +14,17 @@
  * limitations under the License.
  */
 
-// TODO: cleanup
-#include "reader_impl.hpp"
 #include "reader_impl_helpers.cuh"
 
-#include "compact_protocol_reader.hpp"
-
-#include <io/comp/gpuinflate.hpp>
-#include <io/comp/nvcomp_adapter.hpp>
-#include <io/utilities/config_utils.hpp>
-#include <io/utilities/time_utils.cuh>
-
-#include <cudf/detail/utilities/integer_utils.hpp>
-#include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/table/table.hpp>
-#include <cudf/utilities/error.hpp>
-#include <cudf/utilities/traits.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_buffer.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/fill.h>
-#include <thrust/for_each.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/logical.h>
-#include <thrust/transform.h>
-#include <thrust/tuple.h>
-
-#include <algorithm>
-#include <array>
-#include <numeric>
+#include <cudf/io/datasource.hpp>
+
 #include <regex>
 
 namespace cudf::io::detail::parquet {
-// Import functionality that's independent of legacy code
-using namespace cudf::io::parquet;
-using namespace cudf::io;
 
-parquet::ConvertedType logical_type_to_converted_type(parquet::LogicalType const& logical)
+namespace {
+
+ConvertedType logical_type_to_converted_type(LogicalType const& logical)
 {
   if (logical.isset.STRING) {
     return parquet::UTF8;
@@ -95,6 +66,8 @@ parquet::ConvertedType logical_type_to_converted_type(parquet::LogicalType const
   return parquet::UNKNOWN;
 }
 
+}  // namespace
+
 /**
  * @brief Function that translates Parquet datatype to cuDF type enum
  */
@@ -189,35 +162,26 @@ type_id to_type_id(SchemaElement const& schema,
   return type_id::EMPTY;
 }
 
-/**
- * @brief Converts cuDF units to Parquet units.
- *
- * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type.
- */
-std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
-                                                     type_id timestamp_type_id,
-                                                     parquet::Type physical,
-                                                     int8_t converted,
-                                                     int32_t length)
+metadata::metadata(datasource* source)
 {
-  int32_t type_width = (physical == parquet::FIXED_LEN_BYTE_ARRAY) ? length : 0;
-  int32_t clock_rate = 0;
-  if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) {
-    type_width = 1;  // I32 -> I8
-  } else if (column_type_id == type_id::INT16 or column_type_id == type_id::UINT16) {
-    type_width = 2;  // I32 -> I16
-  } else if (column_type_id == type_id::INT32) {
-    type_width = 4;  // str -> hash32
-  } else if (is_chrono(data_type{column_type_id})) {
-    clock_rate = to_clockrate(timestamp_type_id);
-  }
-
-  int8_t converted_type = converted;
-  if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 &&
-      not cudf::is_fixed_point(data_type{column_type_id})) {
-    converted_type = parquet::UNKNOWN;  // Not converting to float64 or decimal
-  }
-  return std::make_tuple(type_width, clock_rate, converted_type);
+  constexpr auto header_len = sizeof(file_header_s);
+  constexpr auto ender_len  = sizeof(file_ender_s);
+
+  const auto len           = source->size();
+  const auto header_buffer = source->host_read(0, header_len);
+  const auto header        = reinterpret_cast<const file_header_s*>(header_buffer->data());
+  const auto ender_buffer  = source->host_read(len - ender_len, ender_len);
+  const auto ender         = reinterpret_cast<const file_ender_s*>(ender_buffer->data());
+  CUDF_EXPECTS(len > header_len + ender_len, "Incorrect data source");
+  CUDF_EXPECTS(header->magic == parquet_magic && ender->magic == parquet_magic,
+               "Corrupted header or footer");
+  CUDF_EXPECTS(ender->footer_len != 0 && ender->footer_len <= (len - header_len - ender_len),
+               "Incorrect footer length");
+
+  const auto buffer = source->host_read(len - ender->footer_len - ender_len, ender->footer_len);
+  CompactProtocolReader cp(buffer->data(), ender->footer_len);
+  CUDF_EXPECTS(cp.read(this), "Cannot parse metadata");
+  CUDF_EXPECTS(cp.InitSchema(this), "Cannot initialize schema");
 }
 
 std::vector<metadata> aggregate_reader_metadata::metadatas_from_sources(
@@ -455,7 +419,7 @@ std::vector<aggregate_reader_metadata::row_group_info> aggregate_reader_metadata
  * @return input column information, output column information, list of output column schema
  * indices
  */
-std::tuple<std::vector<input_column_info>, std::vector<column_buffer>, std::vector<int>>
+std::tuple<std::vector<input_column_info>, std::vector<column_buffer>, std::vector<size_type>>
 aggregate_reader_metadata::select_columns(std::optional<std::vector<std::string>> const& use_names,
                                           bool include_index,
                                           bool strings_to_categorical,
@@ -467,8 +431,9 @@ aggregate_reader_metadata::select_columns(std::optional<std::vector<std::string>
                    schema_elem.children_idx.cend(),
                    [&](size_t col_schema_idx) { return get_schema(col_schema_idx).name == name; });
 
-    return (col_schema_idx != schema_elem.children_idx.end()) ? static_cast<int>(*col_schema_idx)
-                                                              : -1;
+    return (col_schema_idx != schema_elem.children_idx.end())
+             ? static_cast<size_type>(*col_schema_idx)
+             : -1;
   };
 
   std::vector<column_buffer> output_columns;
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cuh b/cpp/src/io/parquet/reader_impl_helpers.cuh
index 43906c52732..76336602a89 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cuh
+++ b/cpp/src/io/parquet/reader_impl_helpers.cuh
@@ -14,45 +14,24 @@
  * limitations under the License.
  */
 
-// TODO: cleanup
-#include "reader_impl.hpp"
+#pragma once
 
 #include "compact_protocol_reader.hpp"
+#include "parquet_gpu.hpp"
 
 #include <io/comp/gpuinflate.hpp>
-#include <io/comp/nvcomp_adapter.hpp>
-#include <io/utilities/config_utils.hpp>
-#include <io/utilities/time_utils.cuh>
 
-#include <cudf/detail/utilities/integer_utils.hpp>
-#include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/table/table.hpp>
+#include <cudf/fixed_point/fixed_point.hpp>
+#include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
-#include <cudf/utilities/traits.hpp>
+#include <cudf/utilities/span.hpp>
 
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_buffer.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/fill.h>
-#include <thrust/for_each.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/logical.h>
-#include <thrust/transform.h>
-#include <thrust/tuple.h>
-
-#include <algorithm>
-#include <array>
-#include <numeric>
-#include <regex>
+#include <tuple>
+#include <vector>
 
 namespace cudf::io::detail::parquet {
-// Import functionality that's independent of legacy code
-using namespace cudf::io::parquet;
-using namespace cudf::io;
 
-parquet::ConvertedType logical_type_to_converted_type(parquet::LogicalType const& logical);
+using namespace cudf::io::parquet;
 
 /**
  * @brief Function that translates Parquet datatype to cuDF type enum
@@ -71,63 +50,11 @@ inline data_type to_data_type(type_id t_id, SchemaElement const& schema)
            : data_type{t_id};
 }
 
-/**
- * @brief Function that returns the required the number of bits to store a value
- */
-template <typename T = uint8_t>
-T required_bits(uint32_t max_level)
-{
-  return static_cast<T>(CompactProtocolReader::NumRequiredBits(max_level));
-}
-
-/**
- * @brief Converts cuDF units to Parquet units.
- *
- * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type.
- */
-std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
-                                                     type_id timestamp_type_id,
-                                                     parquet::Type physical,
-                                                     int8_t converted,
-                                                     int32_t length);
-
-inline void decompress_check(device_span<compression_result const> results,
-                             rmm::cuda_stream_view stream)
-{
-  CUDF_EXPECTS(thrust::all_of(rmm::exec_policy(stream),
-                              results.begin(),
-                              results.end(),
-                              [] __device__(auto const& res) {
-                                return res.status == compression_status::SUCCESS;
-                              }),
-               "Error during decompression");
-}
-
 /**
  * @brief Class for parsing dataset metadata
  */
 struct metadata : public FileMetaData {
-  explicit metadata(datasource* source)
-  {
-    constexpr auto header_len = sizeof(file_header_s);
-    constexpr auto ender_len  = sizeof(file_ender_s);
-
-    const auto len           = source->size();
-    const auto header_buffer = source->host_read(0, header_len);
-    const auto header        = reinterpret_cast<const file_header_s*>(header_buffer->data());
-    const auto ender_buffer  = source->host_read(len - ender_len, ender_len);
-    const auto ender         = reinterpret_cast<const file_ender_s*>(ender_buffer->data());
-    CUDF_EXPECTS(len > header_len + ender_len, "Incorrect data source");
-    CUDF_EXPECTS(header->magic == parquet_magic && ender->magic == parquet_magic,
-                 "Corrupted header or footer");
-    CUDF_EXPECTS(ender->footer_len != 0 && ender->footer_len <= (len - header_len - ender_len),
-                 "Incorrect footer length");
-
-    const auto buffer = source->host_read(len - ender->footer_len - ender_len, ender->footer_len);
-    CompactProtocolReader cp(buffer->data(), ender->footer_len);
-    CUDF_EXPECTS(cp.read(this), "Cannot parse metadata");
-    CUDF_EXPECTS(cp.InitSchema(this), "Cannot initialize schema");
-  }
+  explicit metadata(datasource* source);
 };
 
 class aggregate_reader_metadata {
@@ -254,7 +181,7 @@ class aggregate_reader_metadata {
    * indices
    */
   [[nodiscard]] std::
-    tuple<std::vector<input_column_info>, std::vector<column_buffer>, std::vector<int>>
+    tuple<std::vector<input_column_info>, std::vector<column_buffer>, std::vector<size_type>>
     select_columns(std::optional<std::vector<std::string>> const& use_names,
                    bool include_index,
                    bool strings_to_categorical,
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index f1d6fa203cd..0a3e6ed86a8 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -17,6 +17,8 @@
 #include "reader_impl.hpp"
 #include "reader_impl_helpers.cuh"
 
+#include <io/utilities/time_utils.cuh>
+
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
@@ -25,14 +27,59 @@
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/binary_search.h>
+#include <thrust/functional.h>
+#include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/discard_iterator.h>
+#include <thrust/iterator/iterator_categories.h>
+#include <thrust/iterator/transform_iterator.h>
+#include <thrust/reduce.h>
+#include <thrust/scan.h>
 #include <thrust/sort.h>
+#include <thrust/transform.h>
 
 namespace cudf::io::detail::parquet {
 
-// Import functionality that's independent of legacy code
-using namespace cudf::io::parquet;
-using namespace cudf::io;
+namespace {
+
+/**
+ * @brief Function that returns the required the number of bits to store a value
+ */
+template <typename T = uint8_t>
+T required_bits(uint32_t max_level)
+{
+  return static_cast<T>(CompactProtocolReader::NumRequiredBits(max_level));
+}
+
+/**
+ * @brief Converts cuDF units to Parquet units.
+ *
+ * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type.
+ */
+std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
+                                                     type_id timestamp_type_id,
+                                                     parquet::Type physical,
+                                                     int8_t converted,
+                                                     int32_t length)
+{
+  int32_t type_width = (physical == parquet::FIXED_LEN_BYTE_ARRAY) ? length : 0;
+  int32_t clock_rate = 0;
+  if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) {
+    type_width = 1;  // I32 -> I8
+  } else if (column_type_id == type_id::INT16 or column_type_id == type_id::UINT16) {
+    type_width = 2;  // I32 -> I16
+  } else if (column_type_id == type_id::INT32) {
+    type_width = 4;  // str -> hash32
+  } else if (is_chrono(data_type{column_type_id})) {
+    clock_rate = to_clockrate(timestamp_type_id);
+  }
+
+  int8_t converted_type = converted;
+  if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 &&
+      not cudf::is_fixed_point(data_type{column_type_id})) {
+    converted_type = parquet::UNKNOWN;  // Not converting to float64 or decimal
+  }
+  return std::make_tuple(type_width, clock_rate, converted_type);
+}
 
 #if defined(PREPROCESS_DEBUG)
 void print_pages(hostdevice_vector<gpu::PageInfo>& pages, rmm::cuda_stream_view _stream)
@@ -68,8 +115,6 @@ void print_chunks(hostdevice_vector<gpu::ColumnChunkDesc>& chunks, rmm::cuda_str
 }
 #endif  // PREPROCESS_DEBUG
 
-namespace {
-
 struct cumulative_row_info {
   size_t row_count;   // cumulative row count
   size_t size_bytes;  // cumulative size in bytes
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 70eaf055bfe..85b8f90b8b9 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -224,6 +224,7 @@ ConfigureTest(CSV_TEST io/csv_test.cpp)
 ConfigureTest(FILE_IO_TEST io/file_io_test.cpp)
 ConfigureTest(ORC_TEST io/orc_test.cpp)
 ConfigureTest(PARQUET_TEST io/parquet_test.cpp io/parquet_chunked_reader_test.cpp)
+#ConfigureTest(PARQUET_TEST io/parquet_chunked_reader_test.cpp)
 ConfigureTest(JSON_TEST io/json_test.cpp)
 ConfigureTest(JSON_TYPE_CAST_TEST io/json_type_cast_test.cu)
 ConfigureTest(NESTED_JSON_TEST io/nested_json_test.cpp io/json_tree.cpp)

From c3e1d5382eeab38627e414a91638a598e6ab0a99 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 24 Oct 2022 21:31:26 -0700
Subject: [PATCH 090/162] Cleanup

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 java/src/main/native/src/ChunkedReaderJni.cpp | 65 +++++--------------
 1 file changed, 16 insertions(+), 49 deletions(-)

diff --git a/java/src/main/native/src/ChunkedReaderJni.cpp b/java/src/main/native/src/ChunkedReaderJni.cpp
index aaddf6c743a..60da2a2cd53 100644
--- a/java/src/main/native/src/ChunkedReaderJni.cpp
+++ b/java/src/main/native/src/ChunkedReaderJni.cpp
@@ -14,54 +14,21 @@
  * limitations under the License.
  */
 
-//===================================================================
-//
-// TODO: cleanup header
+#include <memory>
+#include <vector>
 
-#include <arrow/io/api.h>
-#include <arrow/ipc/api.h>
-#include <cudf/aggregation.hpp>
 #include <cudf/column/column.hpp>
-#include <cudf/concatenate.hpp>
-#include <cudf/copying.hpp>
-#include <cudf/filling.hpp>
-#include <cudf/groupby.hpp>
-#include <cudf/hashing.hpp>
-#include <cudf/interop.hpp>
-#include <cudf/io/avro.hpp>
-#include <cudf/io/csv.hpp>
-#include <cudf/io/data_sink.hpp>
-#include <cudf/io/json.hpp>
-#include <cudf/io/orc.hpp>
 #include <cudf/io/parquet.hpp>
-#include <cudf/join.hpp>
-#include <cudf/lists/explode.hpp>
-#include <cudf/merge.hpp>
-#include <cudf/partitioning.hpp>
-#include <cudf/replace.hpp>
-#include <cudf/reshape.hpp>
-#include <cudf/rolling.hpp>
-#include <cudf/search.hpp>
-#include <cudf/sorting.hpp>
-#include <cudf/stream_compaction.hpp>
-#include <cudf/types.hpp>
-#include <cudf/utilities/span.hpp>
-#include <rmm/cuda_stream_view.hpp>
-#include <thrust/iterator/counting_iterator.h>
+#include <cudf/table/table.hpp>
 
 #include "../include/jni_utils.hpp"
 
 #include "cudf_jni_apis.hpp"
-#include "dtype_utils.hpp"
-#include "jni_compiled_expr.hpp"
-#include "row_conversion.hpp"
 
-// TODO: cleanup this
-namespace cudf::jni {
-jlongArray convert_table_for_return(JNIEnv *env, std::unique_ptr<cudf::table> &&table_result,
+// This function is defined in `TableJni.cpp`.
+jlongArray
+cudf::jni::convert_table_for_return(JNIEnv *env, std::unique_ptr<cudf::table> &&table_result,
                                     std::vector<std::unique_ptr<cudf::column>> &&extra_columns);
-}
-using cudf::jni::release_as_jlong;
 
 // This file is for the code releated to chunked reader (Parquet, ORC, etc.).
 
@@ -70,30 +37,30 @@ extern "C" {
 // This function should take all the parameters that `Table.readParquet` takes,
 // plus one more parameter `long chunkSizeByteLimit`.
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_create(
-    JNIEnv *env, jclass, jlong chunk_size_byte_limit, jobjectArray filter_col_names,
-    jbooleanArray j_col_binary_read, jstring inputfilepath, jlong buffer, jlong buffer_length,
+    JNIEnv *env, jclass, jlong chunk_read_limit, jobjectArray filter_col_names,
+    jbooleanArray j_col_binary_read, jstring inp_file_path, jlong buffer, jlong buffer_length,
     jint unit) {
-  JNI_NULL_CHECK(env, j_col_binary_read, "null col_binary_read", 0);
+  JNI_NULL_CHECK(env, j_col_binary_read, "Null col_binary_read", 0);
   bool read_buffer = true;
   if (buffer == 0) {
-    JNI_NULL_CHECK(env, inputfilepath, "input file or buffer must be supplied", 0);
+    JNI_NULL_CHECK(env, inp_file_path, "Input file or buffer must be supplied", 0);
     read_buffer = false;
-  } else if (inputfilepath != nullptr) {
+  } else if (inp_file_path != nullptr) {
     JNI_THROW_NEW(env, "java/lang/IllegalArgumentException",
-                  "cannot pass in both a buffer and an inputfilepath", 0);
+                  "Cannot pass in both a buffer and an inp_file_path", 0);
   } else if (buffer_length <= 0) {
     JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "An empty buffer is not supported", 0);
   }
 
   try {
     cudf::jni::auto_set_device(env);
-    cudf::jni::native_jstring filename(env, inputfilepath);
+    cudf::jni::native_jstring filename(env, inp_file_path);
     if (!read_buffer && filename.is_empty()) {
-      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "inputfilepath can't be empty", 0);
+      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "inp_file_path can't be empty", 0);
     }
 
     cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names);
-    cudf::jni::native_jbooleanArray n_col_binary_read(env, j_col_binary_read);
+    cudf::jni::native_jbooleanArray n_col_binary_read(env, j_col_binary_read); /// << TODO
 
     auto const source = read_buffer ?
                             cudf::io::source_info(reinterpret_cast<char *>(buffer),
@@ -109,7 +76,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_create(
                                .build();
 
     return reinterpret_cast<jlong>(new cudf::io::chunked_parquet_reader(
-        static_cast<std::size_t>(chunk_size_byte_limit), read_opts));
+        static_cast<std::size_t>(chunk_read_limit), read_opts));
   }
   CATCH_STD(env, 0);
 }

From 04e1320dfcafa73bd13a1e5abfc3641a5e6b9322 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 25 Oct 2022 06:16:23 -0700
Subject: [PATCH 091/162] More cleanup

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl_helpers.cu    | 44 -----------
 cpp/src/io/parquet/reader_impl_helpers.cuh   |  1 +
 cpp/src/io/parquet/reader_impl_preprocess.cu | 83 ++++++++++----------
 3 files changed, 44 insertions(+), 84 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl_helpers.cu b/cpp/src/io/parquet/reader_impl_helpers.cu
index 9d155792a30..b519e4c8dc0 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cu
+++ b/cpp/src/io/parquet/reader_impl_helpers.cu
@@ -195,9 +195,6 @@ std::vector<metadata> aggregate_reader_metadata::metadatas_from_sources(
   return metadatas;
 }
 
-/**
- * @brief Collect the keyvalue maps from each per-file metadata object into a vector of maps.
- */
 std::vector<std::unordered_map<std::string, std::string>>
 aggregate_reader_metadata::collect_keyval_metadata()
 {
@@ -219,9 +216,6 @@ aggregate_reader_metadata::collect_keyval_metadata()
   return kv_maps;
 }
 
-/**
- * @brief Sums up the number of rows of each source
- */
 size_type aggregate_reader_metadata::calc_num_rows() const
 {
   return std::accumulate(
@@ -230,9 +224,6 @@ size_type aggregate_reader_metadata::calc_num_rows() const
     });
 }
 
-/**
- * @brief Sums up the number of row groups of each source
- */
 size_type aggregate_reader_metadata::calc_num_row_groups() const
 {
   return std::accumulate(
@@ -287,15 +278,6 @@ ColumnChunkMetaData const& aggregate_reader_metadata::get_column_metadata(size_t
   return col->meta_data;
 }
 
-/**
- * @brief Extracts the pandas "index_columns" section
- *
- * PANDAS adds its own metadata to the key_value section when writing out the
- * dataframe to a file to aid in exact reconstruction. The JSON-formatted
- * metadata contains the index column(s) and PANDA-specific datatypes.
- *
- * @return comma-separated index column names in quotes
- */
 std::string aggregate_reader_metadata::get_pandas_index() const
 {
   // Assumes that all input files have the same metadata
@@ -324,11 +306,6 @@ std::string aggregate_reader_metadata::get_pandas_index() const
   return "";
 }
 
-/**
- * @brief Extracts the column name(s) used for the row indexes in a dataframe
- *
- * @param names List of column names to load, where index column name(s) will be added
- */
 std::vector<std::string> aggregate_reader_metadata::get_pandas_index_names() const
 {
   std::vector<std::string> names;
@@ -349,15 +326,6 @@ std::vector<std::string> aggregate_reader_metadata::get_pandas_index_names() con
   return names;
 }
 
-/**
- * @brief Filters and reduces down to a selection of row groups
- *
- * @param row_groups Lists of row groups to read, one per source
- * @param row_start Starting row of the selection
- * @param row_count Total number of rows selected
- *
- * @return List of row group indexes and its starting row
- */
 std::vector<aggregate_reader_metadata::row_group_info> aggregate_reader_metadata::select_row_groups(
   std::vector<std::vector<size_type>> const& row_groups,
   size_type& row_start,
@@ -407,18 +375,6 @@ std::vector<aggregate_reader_metadata::row_group_info> aggregate_reader_metadata
   return selection;
 }
 
-/**
- * @brief Filters and reduces down to a selection of columns
- *
- * @param use_names List of paths of column names to select; `nullopt` if user did not select
- * columns to read
- * @param include_index Whether to always include the PANDAS index column(s)
- * @param strings_to_categorical Type conversion parameter
- * @param timestamp_type_id Type conversion parameter
- *
- * @return input column information, output column information, list of output column schema
- * indices
- */
 std::tuple<std::vector<input_column_info>, std::vector<column_buffer>, std::vector<size_type>>
 aggregate_reader_metadata::select_columns(std::optional<std::vector<std::string>> const& use_names,
                                           bool include_index,
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cuh b/cpp/src/io/parquet/reader_impl_helpers.cuh
index 76336602a89..b2682a55249 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cuh
+++ b/cpp/src/io/parquet/reader_impl_helpers.cuh
@@ -62,6 +62,7 @@ class aggregate_reader_metadata {
   std::vector<std::unordered_map<std::string, std::string>> keyval_maps;
   size_type num_rows;
   size_type num_row_groups;
+
   /**
    * @brief Create a metadata object from each element in the source vector
    */
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 0a3e6ed86a8..bce3c3e4609 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -41,46 +41,6 @@ namespace cudf::io::detail::parquet {
 
 namespace {
 
-/**
- * @brief Function that returns the required the number of bits to store a value
- */
-template <typename T = uint8_t>
-T required_bits(uint32_t max_level)
-{
-  return static_cast<T>(CompactProtocolReader::NumRequiredBits(max_level));
-}
-
-/**
- * @brief Converts cuDF units to Parquet units.
- *
- * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type.
- */
-std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
-                                                     type_id timestamp_type_id,
-                                                     parquet::Type physical,
-                                                     int8_t converted,
-                                                     int32_t length)
-{
-  int32_t type_width = (physical == parquet::FIXED_LEN_BYTE_ARRAY) ? length : 0;
-  int32_t clock_rate = 0;
-  if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) {
-    type_width = 1;  // I32 -> I8
-  } else if (column_type_id == type_id::INT16 or column_type_id == type_id::UINT16) {
-    type_width = 2;  // I32 -> I16
-  } else if (column_type_id == type_id::INT32) {
-    type_width = 4;  // str -> hash32
-  } else if (is_chrono(data_type{column_type_id})) {
-    clock_rate = to_clockrate(timestamp_type_id);
-  }
-
-  int8_t converted_type = converted;
-  if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 &&
-      not cudf::is_fixed_point(data_type{column_type_id})) {
-    converted_type = parquet::UNKNOWN;  // Not converting to float64 or decimal
-  }
-  return std::make_tuple(type_width, clock_rate, converted_type);
-}
-
 #if defined(PREPROCESS_DEBUG)
 void print_pages(hostdevice_vector<gpu::PageInfo>& pages, rmm::cuda_stream_view _stream)
 {
@@ -719,6 +679,49 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   }
 }
 
+namespace {
+/**
+ * @brief Function that returns the required the number of bits to store a value
+ */
+template <typename T = uint8_t>
+T required_bits(uint32_t max_level)
+{
+  return static_cast<T>(CompactProtocolReader::NumRequiredBits(max_level));
+}
+
+/**
+ * @brief Converts cuDF units to Parquet units.
+ *
+ * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type.
+ */
+std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
+                                                     type_id timestamp_type_id,
+                                                     parquet::Type physical,
+                                                     int8_t converted,
+                                                     int32_t length)
+{
+  int32_t type_width = (physical == parquet::FIXED_LEN_BYTE_ARRAY) ? length : 0;
+  int32_t clock_rate = 0;
+  if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) {
+    type_width = 1;  // I32 -> I8
+  } else if (column_type_id == type_id::INT16 or column_type_id == type_id::UINT16) {
+    type_width = 2;  // I32 -> I16
+  } else if (column_type_id == type_id::INT32) {
+    type_width = 4;  // str -> hash32
+  } else if (is_chrono(data_type{column_type_id})) {
+    clock_rate = to_clockrate(timestamp_type_id);
+  }
+
+  int8_t converted_type = converted;
+  if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 &&
+      not cudf::is_fixed_point(data_type{column_type_id})) {
+    converted_type = parquet::UNKNOWN;  // Not converting to float64 or decimal
+  }
+  return std::make_tuple(type_width, clock_rate, converted_type);
+}
+
+}  // namespace
+
 std::pair<size_type, size_type> reader::impl::preprocess_file(
   size_type skip_rows,
   size_type num_rows,

From 395413d9285b58622fec4d11ef2a32f8248ee9e1 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 25 Oct 2022 09:50:29 -0700
Subject: [PATCH 092/162] Rewrite docs for `parquet.hpp` files

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/detail/parquet.hpp | 29 +++++++++++++-------------
 cpp/include/cudf/io/parquet.hpp        |  2 +-
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp
index 5c5358d35a3..0b3247a1d0a 100644
--- a/cpp/include/cudf/io/detail/parquet.hpp
+++ b/cpp/include/cudf/io/detail/parquet.hpp
@@ -49,6 +49,11 @@ class reader {
   class impl;
   std::unique_ptr<impl> _impl;
 
+  /**
+   * @brief Default constructor, needed for subclassing.
+   */
+  reader();
+
  public:
   /**
    * @brief Constructor from an array of datasources
@@ -75,21 +80,21 @@ class reader {
    *
    * @return The set of columns along with table metadata
    */
-  virtual table_with_metadata read(parquet_reader_options const& options);
+  table_with_metadata read(parquet_reader_options const& options);
 };
 
 /**
- * TODO
+ * @brief The reader class that supports chunked reading of a given file.
  *
- * @brief The chunked_reader class
+ * This class intentionally subclasses the `reader` class with private inheritance to hide the
+ * `reader::read()` API. As such, only chunked reading APIs are supported.
  */
 class chunked_reader : reader {
  public:
   /**
-   * TODO
-   *
-   * @brief Constructor from an array of datasources
+   * @brief Constructor from a read limit and an array of data sources with reader options.
    *
+   * @param chunk_read_limit The byte size limit to read each chunk
    * @param sources Input `datasource` objects to read the dataset from
    * @param options Settings for controlling reading behavior
    * @param stream CUDA stream used for device memory operations and kernel launches.
@@ -102,23 +107,17 @@ class chunked_reader : reader {
                           rmm::mr::device_memory_resource* mr);
 
   /**
-   * @brief Destructor explicitly-declared to avoid inlined in header
+   * @brief Destructor explicitly-declared to avoid inlined in header.
    */
   ~chunked_reader();
 
   /**
-   * TODO
-   *
-   * @brief has_next
-   * @return
+   * @copydoc cudf::io::chunked_parquet_reader::has_next
    */
   bool has_next();
 
   /**
-   * TODO
-   *
-   * @brief read_chunk
-   * @return
+   * @copydoc cudf::io::chunked_parquet_reader::read_chunk
    */
   table_with_metadata read_chunk();
 };
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 66f4fc5b27f..874c593450f 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -445,7 +445,7 @@ class chunked_parquet_reader {
   ~chunked_parquet_reader();
 
   /**
-   * @brief Check if there is any data of the given file has not yet processed.
+   * @brief Check if there is any data of the given file has not yet read.
    *
    * @return A boolean value indicating if there is any data left to read
    */

From e3e19e8dd36009e2315288f7c6ff78906c277b48 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 25 Oct 2022 10:05:14 -0700
Subject: [PATCH 093/162] Extract functions for `reader` and `chunked_reader`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/CMakeLists.txt                     |  1 +
 cpp/include/cudf/io/detail/parquet.hpp | 12 ++---
 cpp/include/cudf/io/parquet.hpp        |  7 ++-
 cpp/src/io/parquet/reader.cu           | 61 ++++++++++++++++++++++
 cpp/src/io/parquet/reader_impl.cu      | 72 ++++++--------------------
 cpp/src/io/parquet/reader_impl.hpp     | 27 +++++-----
 6 files changed, 99 insertions(+), 81 deletions(-)
 create mode 100644 cpp/src/io/parquet/reader.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 1c6fde0a11a..2e2f655b075 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -349,6 +349,7 @@ add_library(
   src/io/parquet/chunk_dict.cu
   src/io/parquet/page_enc.cu
   src/io/parquet/page_hdr.cu
+  src/io/parquet/reader.cu
   src/io/parquet/reader_impl.cu
   src/io/parquet/reader_impl_helpers.cu
   src/io/parquet/reader_impl_preprocess.cu
diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp
index 0b3247a1d0a..14fe5847e1f 100644
--- a/cpp/include/cudf/io/detail/parquet.hpp
+++ b/cpp/include/cudf/io/detail/parquet.hpp
@@ -30,16 +30,14 @@
 #include <string>
 #include <vector>
 
-namespace cudf {
-namespace io {
+namespace cudf::io {
 
 // Forward declaration
 class parquet_reader_options;
 class parquet_writer_options;
 class chunked_parquet_writer_options;
 
-namespace detail {
-namespace parquet {
+namespace detail::parquet {
 
 /**
  * @brief Class to read Parquet dataset data into columns.
@@ -198,7 +196,5 @@ class writer {
     const std::vector<std::unique_ptr<std::vector<uint8_t>>>& metadata_list);
 };
 
-};  // namespace parquet
-};  // namespace detail
-};  // namespace io
-};  // namespace cudf
+}  // namespace detail::parquet
+}  // namespace cudf::io
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 874c593450f..706c909ecbe 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -30,8 +30,7 @@
 #include <string>
 #include <vector>
 
-namespace cudf {
-namespace io {
+namespace cudf::io {
 /**
  * @addtogroup io_readers
  * @{
@@ -1520,5 +1519,5 @@ class parquet_chunked_writer {
 };
 
 /** @} */  // end of group
-}  // namespace io
-}  // namespace cudf
+
+}  // namespace cudf::io
diff --git a/cpp/src/io/parquet/reader.cu b/cpp/src/io/parquet/reader.cu
new file mode 100644
index 00000000000..145a941ae47
--- /dev/null
+++ b/cpp/src/io/parquet/reader.cu
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "reader_impl.hpp"
+#include "reader_impl_helpers.cuh"
+
+#include <cudf/io/detail/parquet.hpp>
+
+namespace cudf::io::detail::parquet {
+
+reader::reader() = default;
+
+reader::reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
+               parquet_reader_options const& options,
+               rmm::cuda_stream_view stream,
+               rmm::mr::device_memory_resource* mr)
+  : _impl(std::make_unique<impl>(std::move(sources), options, stream, mr))
+{
+}
+
+reader::~reader() = default;
+
+table_with_metadata reader::read(parquet_reader_options const& options)
+{
+  // if the user has specified custom row bounds
+  bool const uses_custom_row_bounds = options.get_num_rows() >= 0 || options.get_skip_rows() != 0;
+  return _impl->read(options.get_skip_rows(),
+                     options.get_num_rows(),
+                     uses_custom_row_bounds,
+                     options.get_row_groups());
+}
+
+chunked_reader::chunked_reader(std::size_t chunk_read_limit,
+                               std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
+                               parquet_reader_options const& options,
+                               rmm::cuda_stream_view stream,
+                               rmm::mr::device_memory_resource* mr)
+{
+  _impl = std::make_unique<impl>(chunk_read_limit, std::move(sources), options, stream, mr);
+}
+
+chunked_reader::~chunked_reader() = default;
+
+bool chunked_reader::has_next() { return _impl->has_next(); }
+
+table_with_metadata chunked_reader::read_chunk() { return _impl->read_chunk(); }
+
+}  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index fa49e60b742..cda05c54707 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -14,11 +14,6 @@
  * limitations under the License.
  */
 
-/**
- * @file reader_impl.cu
- * @brief cuDF-IO Parquet reader class implementation
- */
-
 #include "reader_impl.hpp"
 #include "reader_impl_helpers.cuh"
 
@@ -35,10 +30,7 @@
 #include <thrust/fill.h>
 #include <thrust/logical.h>
 
-namespace cudf {
-namespace io {
-namespace detail {
-namespace parquet {
+namespace cudf::io::detail::parquet {
 
 namespace {
 
@@ -771,6 +763,19 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                               _timestamp_type.id());
 }
 
+reader::impl::impl(std::size_t chunk_read_limit,
+                   std::vector<std::unique_ptr<datasource>>&& sources,
+                   parquet_reader_options const& options,
+                   rmm::cuda_stream_view stream,
+                   rmm::mr::device_memory_resource* mr)
+  : impl(std::forward<std::vector<std::unique_ptr<cudf::io::datasource>>>(sources),
+         options,
+         stream,
+         mr)
+{
+  _chunk_read_limit = chunk_read_limit;
+}
+
 void reader::impl::preprocess_file_and_columns(
   size_type skip_rows,
   size_type num_rows,
@@ -910,51 +915,4 @@ bool reader::impl::has_next()
   return _current_read_chunk < _chunk_read_info.size();
 }
 
-// Forward to implementation
-reader::reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
-               parquet_reader_options const& options,
-               rmm::cuda_stream_view stream,
-               rmm::mr::device_memory_resource* mr)
-  : _impl(std::make_unique<impl>(std::move(sources), options, stream, mr))
-{
-}
-
-// Destructor within this translation unit
-reader::~reader() = default;
-
-// Forward to implementation
-table_with_metadata reader::read(parquet_reader_options const& options)
-{
-  // if the user has specified custom row bounds
-  bool const uses_custom_row_bounds = options.get_num_rows() >= 0 || options.get_skip_rows() != 0;
-  return _impl->read(options.get_skip_rows(),
-                     options.get_num_rows(),
-                     uses_custom_row_bounds,
-                     options.get_row_groups());
-}
-
-// Forward to implementation
-chunked_reader::chunked_reader(std::size_t chunk_read_limit,
-                               std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
-                               parquet_reader_options const& options,
-                               rmm::cuda_stream_view stream,
-                               rmm::mr::device_memory_resource* mr)
-  : reader(std::forward<std::vector<std::unique_ptr<cudf::io::datasource>>>(sources),
-           options,
-           stream,
-           mr)
-{
-  _impl->set_chunk_read_limit(chunk_read_limit);
-}
-
-// Destructor within this translation unit
-chunked_reader::~chunked_reader() = default;
-
-bool chunked_reader::has_next() { return _impl->has_next(); }
-
-table_with_metadata chunked_reader::read_chunk() { return _impl->read_chunk(); }
-
-}  // namespace parquet
-}  // namespace detail
-}  // namespace io
-}  // namespace cudf
+}  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 2d54b8984e8..4d5f07844db 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -38,10 +38,8 @@
 #include <utility>
 #include <vector>
 
-namespace cudf {
-namespace io {
-namespace detail {
-namespace parquet {
+namespace cudf::io::detail::parquet {
+
 using namespace cudf::io::parquet;
 using namespace cudf::io;
 
@@ -83,11 +81,19 @@ class reader::impl {
                            std::vector<std::vector<size_type>> const& row_group_indices);
 
   /**
-   * @brief set_chunk_read_limit
-   * // TODO
-   * @param chunk_read_limit
+   * @brief Constructor from a chunk read limit and an array of dataset sources with reader options.
+   *
+   * @param chunk_read_limit The byte size limit to read each chunk
+   * @param sources Dataset sources
+   * @param options Settings for controlling reading behavior
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   * @param mr Device memory resource to use for device memory allocation
    */
-  void set_chunk_read_limit(std::size_t chunk_read_limit) { _chunk_read_limit = chunk_read_limit; }
+  explicit impl(std::size_t chunk_read_limit,
+                std::vector<std::unique_ptr<datasource>>&& sources,
+                parquet_reader_options const& options,
+                rmm::cuda_stream_view stream,
+                rmm::mr::device_memory_resource* mr);
 
   /**
    * TODO
@@ -293,7 +299,4 @@ class reader::impl {
   parquet_reader_options const _options;
 };
 
-}  // namespace parquet
-}  // namespace detail
-}  // namespace io
-}  // namespace cudf
+}  // namespace cudf::io::detail::parquet

From 52339da2dbca201500258bb698397760ad47493a Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Tue, 25 Oct 2022 12:07:38 -0500
Subject: [PATCH 094/162] Fix issues with string length computation.

---
 cpp/src/io/parquet/page_data.cu         | 128 +++++++++++++++---------
 cpp/src/io/parquet/reader_impl.cu       |   7 +-
 cpp/src/io/parquet/reader_preprocess.cu |   2 +-
 3 files changed, 88 insertions(+), 49 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 2bab44cef52..9c25fe1e7a1 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -287,11 +287,13 @@ __device__ void gpuDecodeStream(
  *
  * @return The new output position
  */
-__device__ int gpuDecodeDictionaryIndices(volatile page_state_s* s, int target_pos, int t)
+template<bool sizes_only>
+__device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s* s, int target_pos, int t)
 {
   const uint8_t* end = s->data_end;
   int dict_bits      = s->dict_bits;
   int pos            = s->dict_pos;
+  int str_len = 0;  
 
   while (pos < target_pos) {
     int is_literal, batch_len;
@@ -336,6 +338,8 @@ __device__ int gpuDecodeDictionaryIndices(volatile page_state_s* s, int target_p
     __syncwarp();
     is_literal = shuffle(is_literal);
     batch_len  = shuffle(batch_len);
+    
+    int len = 0;
     if (t < batch_len) {
       int dict_idx = s->dict_val;
       if (is_literal) {
@@ -357,11 +361,39 @@ __device__ int gpuDecodeDictionaryIndices(volatile page_state_s* s, int target_p
           dict_idx &= (1 << dict_bits) - 1;
         }
       }
-      s->dict_idx[(pos + t) & (non_zero_buffer_size - 1)] = dict_idx;
+      
+      // if we're computing indices, store it off.
+      if constexpr(sizes_only){        
+        len = [&](){
+          // we may end up decoding more indices than we asked for. so don't include those in the size
+          // calculation
+          if(pos + t >= target_pos){
+            return 0;
+          }          
+          // TODO:  refactor this with gpuGetStringData / gpuGetStringSize
+          uint32_t const dict_pos = (s->dict_bits > 0) ? dict_idx * sizeof(string_index_pair) : 0;
+          if (target_pos && dict_pos < (uint32_t)s->dict_size) {
+            const auto* src = reinterpret_cast<const string_index_pair*>(s->dict_base + dict_pos);
+            return src->second;            
+          }
+          return 0;
+        }();
+      } else {
+        s->dict_idx[(pos + t) & (non_zero_buffer_size - 1)] = dict_idx;
+      }
+    }
+
+    // if we're computing sizes, sum it
+    if constexpr(sizes_only){     
+      typedef cub::WarpReduce<size_type> WarpReduce;
+      __shared__ typename WarpReduce::TempStorage temp_storage;
+      // note: str_len will only be valid on thread 0.
+      str_len += WarpReduce(temp_storage).Sum(len);
     }
+
     pos += batch_len;
   }
-  return pos;
+  return {pos, str_len};
 }
 
 /**
@@ -436,9 +468,11 @@ __device__ int gpuDecodeRleBooleans(volatile page_state_s* s, int target_pos, in
  *
  * @return The new output position
  */
-__device__ void gpuInitStringDescriptors(volatile page_state_s* s, int target_pos, int t)
-{
+__device__ size_type gpuInitStringDescriptors(volatile page_state_s* s, int target_pos, int t)
+{  
   int pos = s->dict_pos;
+  int total_len = 0;    
+
   // This step is purely serial
   if (!t) {
     const uint8_t* cur = s->data_start;
@@ -457,11 +491,14 @@ __device__ void gpuInitStringDescriptors(volatile page_state_s* s, int target_po
       s->dict_idx[pos & (non_zero_buffer_size - 1)] = k;
       s->str_len[pos & (non_zero_buffer_size - 1)]  = len;
       k += len;
+      total_len += len;
       pos++;
     }
     s->dict_val = k;
     __threadfence_block();
   }
+
+  return total_len;
 }
 
 inline __device__ std::pair<const char*, size_t> gpuGetStringData(volatile page_state_s* s,
@@ -500,7 +537,7 @@ inline __device__ std::pair<const char*, size_t> gpuGetStringData(volatile page_
  *
  * @return The length of the string
  */
-inline __device__ size_type gpuGetStringSize(volatile page_state_s* s, int src_pos)
+inline __device__ size_t gpuGetStringSize(volatile page_state_s* s, int src_pos)
 {
   if (s->dtype_len == 4) { return 4; }
   auto [_, len] = gpuGetStringData(s, src_pos);
@@ -1423,7 +1460,6 @@ __device__ void gpuDecodeLevels(page_state_s* s, int32_t target_leaf_count, int
  * @param[in] bounds_set Whether or not s->row_index_lower_bound, s->first_row and s->num_rows
  * have been computed for this page (they will only be set in the second/trim pass).
  */
-template <bool is_string_column>
 static __device__ void gpuUpdatePageSizes(page_state_s* s,
                                           int32_t target_input_value_count,
                                           int t,
@@ -1437,6 +1473,9 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   int input_leaf_count = s->input_leaf_count;
   // how many rows we've processed in the page so far
   int input_row_count = s->input_row_count;
+  
+  // how many valid leaves we've processed
+  // int input_leaf_valid_count = s->input_leaf_valid_count;
 
   while (input_value_count < target_input_value_count) {
     int start_depth, end_depth, d;
@@ -1475,39 +1514,25 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
       }
     }
 
-    // increment counts across all nesting depths
+    // increment value counts across all nesting depths
     for (int s_idx = 0; s_idx < max_depth; s_idx++) {
+      PageNestingInfo* pni = &s->page.nesting[s_idx];
+
       // if we are within the range of nesting levels we should be adding value indices for
       int const in_nesting_bounds =
         (s_idx >= start_depth && s_idx <= end_depth && in_row_bounds) ? 1 : 0;
       uint32_t const count_mask = ballot(in_nesting_bounds);
       if (!t) {
-        s->page.nesting[s_idx].size += __popc(count_mask);
+        pni->size += __popc(count_mask);
       }
 
-      // string lengths, if applicable
-      if constexpr (is_string_column) {
-        if (s_idx == max_depth - 1) {
-          // string len for each thread
-          size_type const str_len = [&]() {
-            if (is_new_leaf) {
-              int const src_pos  = input_leaf_count + __popc(warp_leaf_count_mask & ((1 << t) - 1));
-              auto const str_len = gpuGetStringSize(s, src_pos);
-              return str_len;
-            }
-            return 0;
-          }();
-
-          // sum sizes from all threads.
-          using warp_reduce = cub::WarpReduce<uint32_t>;
-          __shared__ typename warp_reduce::TempStorage temp_storage[1];
-          size_type warp_total_str_len = warp_reduce(temp_storage[0]).Sum(str_len);
-          if (!t) {
-            s->page.str_bytes += warp_total_str_len;
-            // printf("STR BYTES: %d\n", s->page.str_bytes);
-          }
-        }
+      /*
+      if (s_idx == max_depth - 1) {
+        bool const is_valid = is_new_leaf && in_nesting_bounds;
+        uint32_t const warp_leaf_valid_mask = ballot(is_valid);
+        input_leaf_valid_count += __popc(warp_leaf_valid_mask);
       }
+      */
     }
 
     input_value_count += min(32, (target_input_value_count - input_value_count));
@@ -1519,10 +1544,26 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   if (!t) {
     s->input_value_count = target_input_value_count;
     s->input_leaf_count  = input_leaf_count;
+    // s->input_leaf_valid_count = input_leaf_valid_count;
     s->input_row_count   = input_row_count;
   }
 }
 
+__device__ size_type gpuGetStringSizes(page_state_s* s, int target_count, int t)
+{  
+  auto dict_target_pos = target_count;  
+  size_type str_len = 0;
+  if (s->dict_base) {
+    auto const[new_target_pos, len] = gpuDecodeDictionaryIndices<true>(s, target_count, t);
+    dict_target_pos = new_target_pos;
+    str_len = len;
+  } else if ((s->col.data_type & 7) == BYTE_ARRAY) {
+    str_len = gpuInitStringDescriptors(s, target_count, t);    
+  }
+  if (!t) { *(volatile int32_t*)&s->dict_pos = dict_target_pos; }
+  return str_len;  
+}
+
 /**
  * @brief Kernel for computing per-page column size information for all nesting levels.
  *
@@ -1621,26 +1662,17 @@ __global__ void __launch_bounds__(block_size)
       int actual_input_count = has_repetition ? min(s->lvl_count[level_type::REPETITION],
                                                     s->lvl_count[level_type::DEFINITION])
                                               : s->lvl_count[level_type::DEFINITION];
-      actual_input_count     = min(actual_input_count, s->num_input_values);
-
+            
       // process what we got back
+      gpuUpdatePageSizes(s, actual_input_count, t, !compute_num_rows_pass);
       if (compute_string_sizes) {
-        auto src_target_pos = target_input_count;
-        // TODO: compute this in another warp like the decode step does
-        if (s->dict_base) {
-          src_target_pos = gpuDecodeDictionaryIndices(s, src_target_pos, t);
-        } else if ((s->col.data_type & 7) == BYTE_ARRAY) {
-          gpuInitStringDescriptors(s, src_target_pos, t);
+        auto const str_len = gpuGetStringSizes(s, s->input_leaf_count, t);
+        if(!t){
+          s->page.str_bytes += str_len;
         }
-        if (!t) { *(volatile int32_t*)&s->dict_pos = src_target_pos; }
-
-        gpuUpdatePageSizes<true>(s, actual_input_count, t, !compute_num_rows_pass);
-      } else {
-        gpuUpdatePageSizes<false>(s, actual_input_count, t, !compute_num_rows_pass);
       }
 
-      // target_input_count = actual_input_count + batch_size;
-      target_input_count += batch_size;
+      target_input_count = actual_input_count + batch_size;  
       __syncwarp();
     }
   }
@@ -1652,6 +1684,7 @@ __global__ void __launch_bounds__(block_size)
     pp->skipped_values      = s->page.skipped_values;
     pp->skipped_leaf_values = s->page.skipped_leaf_values;
     pp->str_bytes           = s->page.str_bytes;
+    // printf("STR BYTES: %d\n", s->page.str_bytes);
   }
 }
 
@@ -1720,7 +1753,8 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
 
       // WARP1: Decode dictionary indices, booleans or string positions
       if (s->dict_base) {
-        src_target_pos = gpuDecodeDictionaryIndices(s, src_target_pos, t & 0x1f);
+        auto const[new_target_pos, _] = gpuDecodeDictionaryIndices<false>(s, src_target_pos, t & 0x1f);
+        src_target_pos = new_target_pos;
       } else if ((s->col.data_type & 7) == BOOLEAN) {
         src_target_pos = gpuDecodeRleBooleans(s, src_target_pos, t & 0x1f);
       } else if ((s->col.data_type & 7) == BYTE_ARRAY) {
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 24888073701..03594ebfee3 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1838,15 +1838,20 @@ table_with_metadata reader::impl::finalize_output(table_metadata& out_metadata,
   return {std::make_unique<table>(std::move(out_columns)), std::move(out_metadata)};
 }
 
+// #define ALLOW_PLAIN_READ_CHUNK_LIMIT
 table_with_metadata reader::impl::read(size_type skip_rows,
                                        size_type num_rows,
                                        bool uses_custom_row_bounds,
                                        std::vector<std::vector<size_type>> const& row_group_list)
 {
+#if defined(ALLOW_PLAIN_READ_CHUNK_LIMIT)
+  preprocess_file_and_columns(skip_rows, num_rows, uses_custom_row_bounds || _chunk_read_limit > 0, row_group_list);
+  return read_chunk_internal(uses_custom_row_bounds || _chunk_read_limit > 0);
+#else 
   CUDF_EXPECTS(_chunk_read_limit == 0, "Reading the whole file must not have non-zero byte_limit.");
-
   preprocess_file_and_columns(skip_rows, num_rows, uses_custom_row_bounds, row_group_list);
   return read_chunk_internal(uses_custom_row_bounds);
+#endif
 }
 
 table_with_metadata reader::impl::read_chunk()
diff --git a/cpp/src/io/parquet/reader_preprocess.cu b/cpp/src/io/parquet/reader_preprocess.cu
index 9a6a2388da8..a87cbff7ca1 100644
--- a/cpp/src/io/parquet/reader_preprocess.cu
+++ b/cpp/src/io/parquet/reader_preprocess.cu
@@ -182,7 +182,7 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
                                 page_input,
                                 c_info.begin(),
                                 thrust::equal_to{},
-                                cumulative_row_sum{});
+                                cumulative_row_sum{});  
   // clang-format off
   /*
   stream.synchronize();

From 83fa31a40c819b7a0cabc41515e7612e3c1121f8 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 25 Oct 2022 10:10:43 -0700
Subject: [PATCH 095/162] Remove redundant changes

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/parquet.hpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 706c909ecbe..426f6da6209 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -49,7 +49,6 @@ class parquet_reader_options_builder;
  * @brief Settings for `read_parquet()`.
  */
 class parquet_reader_options {
- protected:
   source_info _source;
 
   // Path in schema of column to read; `nullopt` is all
@@ -88,11 +87,6 @@ class parquet_reader_options {
    */
   explicit parquet_reader_options() = default;
 
-  /**
-   * @brief Default destructor, needs to be virtual for polymorphism.
-   */
-  virtual ~parquet_reader_options() = default;
-
   /**
    * @brief Creates a parquet_reader_options_builder which will build parquet_reader_options.
    *
@@ -257,7 +251,6 @@ class parquet_reader_options {
  * @brief Builds parquet_reader_options to use for `read_parquet()`.
  */
 class parquet_reader_options_builder {
- protected:
   parquet_reader_options options;
 
  public:

From 02ccdecb1293aa3c6507766c5d7527ba7c26d998 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 25 Oct 2022 11:13:51 -0700
Subject: [PATCH 096/162] Add simple structs test

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 32 ++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 9f61e43f076..358e6fd7d61 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -53,6 +53,7 @@ auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
 using int32s_col  = cudf::test::fixed_width_column_wrapper<int32_t>;
 using int64s_col  = cudf::test::fixed_width_column_wrapper<int64_t>;
 using strings_col = cudf::test::strings_column_wrapper;
+using structs_col = cudf::test::structs_column_wrapper;
 
 auto chunked_read(std::string const& filepath, std::size_t byte_limit)
 {
@@ -151,3 +152,34 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
     CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
   }
 }
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleStructs)
+{
+  auto constexpr num_rows = 100'000;
+  auto const filepath     = temp_env->get_temp_filepath("chunked_read_simple_structs.parquet");
+
+  auto const int_iter = thrust::make_counting_iterator(0);
+  auto const str_iter =
+    cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return std::to_string(i); });
+
+  auto const a = int32s_col(int_iter, int_iter + num_rows);
+  auto const b = [=] {
+    auto child1 = int32s_col(int_iter, int_iter + num_rows);
+    auto child2 = int32s_col(int_iter + num_rows, int_iter + num_rows * 2);
+    auto child3 = strings_col{str_iter, str_iter + num_rows};
+    return structs_col{{child1, child2, child3}};
+  }();
+  auto const input = cudf::table_view{{a, b}};
+
+  auto const write_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input)
+      .max_page_size_bytes(512 * 1024)  // 512KB per page
+      .max_page_size_rows(20000)        // 20k rows per page
+      .build();
+  cudf::io::write_parquet(write_opts);
+
+  auto const [result, num_chunks] = chunked_read(filepath, 500'000);
+  EXPECT_EQ(num_chunks, 5);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
+}

From ee0ffad00e8371906c4fc403fd4f3906c25e2b13 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 25 Oct 2022 13:10:34 -0700
Subject: [PATCH 097/162] Rewrite tests

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 222 +++++++++++++------
 1 file changed, 151 insertions(+), 71 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 358e6fd7d61..1867b84d8c4 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -23,9 +23,11 @@
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <cudf/column/column.hpp>
 #include <cudf/concatenate.hpp>
 #include <cudf/copying.hpp>
 #include <cudf/detail/iterator.cuh>
+#include <cudf/detail/structs/utilities.hpp>
 #include <cudf/fixed_point/fixed_point.hpp>
 #include <cudf/io/data_sink.hpp>
 #include <cudf/io/datasource.hpp>
@@ -55,6 +57,51 @@ using int64s_col  = cudf::test::fixed_width_column_wrapper<int64_t>;
 using strings_col = cudf::test::strings_column_wrapper;
 using structs_col = cudf::test::structs_column_wrapper;
 
+auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
+                std::string const& filename,
+                bool nullable,
+                std::size_t max_page_size_bytes = cudf::io::default_max_page_size_bytes,
+                std::size_t max_page_size_rows  = cudf::io::default_max_page_size_rows)
+{
+  // Generate deterministic bitmask instead of random bitmask for easy verification.
+  auto const valid_iter = cudf::detail::make_counting_transform_iterator(
+    0, [&](int32_t i) -> bool { return static_cast<bool>(i % 2); });
+
+  // Just shift nulls of the next column by one position to avoid having all nulls in the same
+  // table rows.
+  if (nullable) {
+    cudf::size_type offset{0};
+    for (auto& col : input_columns) {
+      col->set_null_mask(
+        cudf::test::detail::make_null_mask(valid_iter + offset, valid_iter + col->size() + offset));
+
+      if (col->type().id() == cudf::type_id::STRUCT) {
+        auto const null_mask  = col->view().null_mask();
+        auto const null_count = col->null_count();
+        for (cudf::size_type idx = 0; idx < col->num_children(); ++idx) {
+          cudf::structs::detail::superimpose_parent_nulls(null_mask,
+                                                          null_count,
+                                                          col->child(idx),
+                                                          cudf::get_default_stream(),
+                                                          rmm::mr::get_current_device_resource());
+        }
+      }
+    }
+  }
+  auto input_table = std::make_unique<cudf::table>(std::move(input_columns));
+  auto filepath =
+    temp_env->get_temp_filepath(nullable ? filename + "_nullable.parquet" : filename + ".parquet");
+
+  auto const write_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *input_table)
+      .max_page_size_bytes(max_page_size_bytes)
+      .max_page_size_rows(max_page_size_rows)
+      .build();
+  cudf::io::write_parquet(write_opts);
+
+  return std::pair{std::move(input_table), std::move(filepath)};
+}
+
 auto chunked_read(std::string const& filepath, std::size_t byte_limit)
 {
   auto const read_opts =
@@ -85,101 +132,134 @@ struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
 {
   auto constexpr num_rows = 40'000;
-  auto const filepath     = temp_env->get_temp_filepath("chunked_read_simple.parquet");
 
-  auto const values = thrust::make_counting_iterator(0);
-  auto const a      = int32s_col(values, values + num_rows);
-  auto const b      = int64s_col(values, values + num_rows);
-  auto const input  = cudf::table_view{{a, b}};
+  auto const do_test = [](std::size_t chunk_read_limit, bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const value_iter = thrust::make_counting_iterator(0);
+    input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release());
+    input_columns.emplace_back(int64s_col(value_iter, value_iter + num_rows).release());
 
-  auto const write_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input).build();
-  cudf::io::write_parquet(write_opts);
+    auto [input_table, filepath] = write_file(input_columns, "chunked_read_simple", nullable);
+    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
+    return std::tuple{std::move(input_table), std::move(result), num_chunks};
+  };
 
-  auto const [result, num_chunks] = chunked_read(filepath, 240'000);
-  EXPECT_EQ(num_chunks, 2);
+  {
+    auto const [input, result, num_chunks] = do_test(240'000, false);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
+  {
+    auto const [input, result, num_chunks] = do_test(240'000, true);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
 }
 
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
 {
   auto constexpr num_rows = 60'000;
-  auto const filepath     = temp_env->get_temp_filepath("chunked_read_with_strings.parquet");
-  auto const values       = thrust::make_counting_iterator(0);
-
-  // ints                                            Page    total bytes   cumulative bytes
-  // 20000 rows of 4 bytes each                    = A0      80000         80000
-  // 20000 rows of 4 bytes each                    = A1      80000         160000
-  // 20000 rows of 4 bytes each                    = A2      80000         240000
-  auto const a = int32s_col(values, values + num_rows);
-
-  // strings                                         Page    total bytes   cumulative bytes
-  // 20000 rows of 1 char each    (20000  + 80004) = B0      100004        100004
-  // 20000 rows of 4 chars each   (80000  + 80004) = B1      160004        260008
-  // 20000 rows of 16 chars each  (320000 + 80004) = B2      400004        660012
-  auto const strings  = std::vector<std::string>{"a", "bbbb", "cccccccccccccccc"};
-  auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) {
-    if (i < 20000) { return strings[0]; }
-    if (i < 40000) { return strings[1]; }
-    return strings[2];
-  });
-  auto const b        = strings_col{str_iter, str_iter + num_rows};
-  auto const input    = cudf::table_view{{a, b}};
 
-  auto const write_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input)
-      .max_page_size_bytes(512 * 1024)  // 512KB per page
-      .max_page_size_rows(20000)        // 20k rows per page
-      .build();
-  cudf::io::write_parquet(write_opts);
+  auto const do_test = [](std::size_t chunk_read_limit, bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const value_iter = thrust::make_counting_iterator(0);
+
+    // ints                                            Page    total bytes   cumulative bytes
+    // 20000 rows of 4 bytes each                    = A0      80000         80000
+    // 20000 rows of 4 bytes each                    = A1      80000         160000
+    // 20000 rows of 4 bytes each                    = A2      80000         240000
+    input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release());
+
+    // strings                                         Page    total bytes   cumulative bytes
+    // 20000 rows of 1 char each    (20000  + 80004) = B0      100004        100004
+    // 20000 rows of 4 chars each   (80000  + 80004) = B1      160004        260008
+    // 20000 rows of 16 chars each  (320000 + 80004) = B2      400004        660012
+    auto const strings  = std::vector<std::string>{"a", "bbbb", "cccccccccccccccc"};
+    auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) {
+      if (i < 20000) { return strings[0]; }
+      if (i < 40000) { return strings[1]; }
+      return strings[2];
+    });
+    input_columns.emplace_back(strings_col(str_iter, str_iter + num_rows).release());
+
+    auto [input_table, filepath] = write_file(input_columns,
+                                              "chunked_read_with_strings",
+                                              nullable,
+                                              512 * 1024,  // 512KB per page
+                                              20000        // 20k rows per page
+    );
+
+    // Cumulative sizes:
+    // A0 + B0 :  180004
+    // A1 + B1 :  420008
+    // A2 + B2 :  900012
+    //                                    skip_rows / num_rows
+    // byte_limit==500000  should give 2 chunks: {0, 40000}, {40000, 20000}
+    // byte_limit==1000000 should give 1 chunks: {0, 60000},
+    auto [result, num_chunks] = chunked_read(filepath, chunk_read_limit);
+    return std::tuple{std::move(input_table), std::move(result), num_chunks};
+  };
 
-  // Cumulative sizes:
-  // A0 + B0 :  180004
-  // A1 + B1 :  420008
-  // A2 + B2 :  900012
-  //                                             skip_rows / num_rows
-  // byte_limit==500000  should give 2 chunks: {0, 40000}, {40000, 20000}
-  // byte_limit==1000000 should give 1 chunks: {0, 60000},
   {
-    auto const [result, num_chunks] = chunked_read(filepath, 500'000);
+    auto const [input, result, num_chunks] = do_test(500'000, false);
     EXPECT_EQ(num_chunks, 2);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
   }
   {
-    auto const [result, num_chunks] = chunked_read(filepath, 1'000'000);
+    auto const [input, result, num_chunks] = do_test(500'000, true);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
+
+  {
+    auto const [input, result, num_chunks] = do_test(1'000'000, false);
     EXPECT_EQ(num_chunks, 1);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
+  {
+    auto const [input, result, num_chunks] = do_test(1'000'000, true);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
   }
 }
 
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleStructs)
 {
   auto constexpr num_rows = 100'000;
-  auto const filepath     = temp_env->get_temp_filepath("chunked_read_simple_structs.parquet");
-
-  auto const int_iter = thrust::make_counting_iterator(0);
-  auto const str_iter =
-    cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return std::to_string(i); });
 
-  auto const a = int32s_col(int_iter, int_iter + num_rows);
-  auto const b = [=] {
-    auto child1 = int32s_col(int_iter, int_iter + num_rows);
-    auto child2 = int32s_col(int_iter + num_rows, int_iter + num_rows * 2);
-    auto child3 = strings_col{str_iter, str_iter + num_rows};
-    return structs_col{{child1, child2, child3}};
-  }();
-  auto const input = cudf::table_view{{a, b}};
+  auto const do_test = [](std::size_t chunk_read_limit, bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const int_iter = thrust::make_counting_iterator(0);
+    auto const str_iter = cudf::detail::make_counting_transform_iterator(
+      0, [&](int32_t i) { return std::to_string(i); });
+    input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
+    input_columns.emplace_back([=] {
+      auto child1 = int32s_col(int_iter, int_iter + num_rows);
+      auto child2 = int32s_col(int_iter + num_rows, int_iter + num_rows * 2);
+      auto child3 = strings_col{str_iter, str_iter + num_rows};
+      return structs_col{{child1, child2, child3}}.release();
+    }());
 
-  auto const write_opts =
-    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input)
-      .max_page_size_bytes(512 * 1024)  // 512KB per page
-      .max_page_size_rows(20000)        // 20k rows per page
-      .build();
-  cudf::io::write_parquet(write_opts);
+    auto [input_table, filepath] = write_file(input_columns,
+                                              "chunked_read_simple_structs",
+                                              nullable,
+                                              512 * 1024,  // 512KB per page
+                                              20000        // 20k rows per page
+    );
+    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
+    return std::tuple{std::move(input_table), std::move(result), num_chunks};
+  };
 
-  auto const [result, num_chunks] = chunked_read(filepath, 500'000);
-  EXPECT_EQ(num_chunks, 5);
+  {
+    auto const [input, result, num_chunks] = do_test(500'000, false);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
+  {
+    auto const [input, result, num_chunks] = do_test(500'000, true);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
 }

From c149d644819a7cb51da4884b34569d8cb2d0f653 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 25 Oct 2022 14:19:55 -0700
Subject: [PATCH 098/162] Add lists test

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 55 +++++++++++++++++---
 1 file changed, 49 insertions(+), 6 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 1867b84d8c4..f12e804c676 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -52,10 +52,11 @@ namespace {
 auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
   ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
 
-using int32s_col  = cudf::test::fixed_width_column_wrapper<int32_t>;
-using int64s_col  = cudf::test::fixed_width_column_wrapper<int64_t>;
-using strings_col = cudf::test::strings_column_wrapper;
-using structs_col = cudf::test::structs_column_wrapper;
+using int32s_col       = cudf::test::fixed_width_column_wrapper<int32_t>;
+using int64s_col       = cudf::test::fixed_width_column_wrapper<int64_t>;
+using strings_col      = cudf::test::strings_column_wrapper;
+using structs_col      = cudf::test::structs_column_wrapper;
+using int32s_lists_col = cudf::test::lists_column_wrapper<int32_t>;
 
 auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
                 std::string const& filename,
@@ -85,6 +86,8 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
                                                           cudf::get_default_stream(),
                                                           rmm::mr::get_current_device_resource());
         }
+      } else if (col->type().id() == cudf::type_id::LIST) {
+        col = cudf::purge_nonempty_nulls(cudf::lists_column_view{col->view()});
       }
     }
   }
@@ -224,7 +227,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
   }
 }
 
-TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleStructs)
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructs)
 {
   auto constexpr num_rows = 100'000;
 
@@ -242,7 +245,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleStructs)
     }());
 
     auto [input_table, filepath] = write_file(input_columns,
-                                              "chunked_read_simple_structs",
+                                              "chunked_read_with_structs",
                                               nullable,
                                               512 * 1024,  // 512KB per page
                                               20000        // 20k rows per page
@@ -263,3 +266,43 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleStructs)
     CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
   }
 }
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithLists)
+{
+  auto constexpr num_rows = 100'000;
+
+  auto const do_test = [](std::size_t chunk_read_limit, bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const int_iter = thrust::make_counting_iterator(0);
+    input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
+
+    auto const template_lists = int32s_lists_col{
+      int32s_lists_col{}, int32s_lists_col{0}, int32s_lists_col{0, 1}, int32s_lists_col{0, 1, 2}};
+    auto const gather_iter =
+      cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return i % 4; });
+    auto const gather_map = int32s_col(gather_iter, gather_iter + num_rows);
+    input_columns.emplace_back(
+      std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front()));
+
+    auto [input_table, filepath] = write_file(input_columns,
+                                              "chunked_read_with_lists",
+                                              nullable,
+                                              512 * 1024,  // 512KB per page
+                                              20000        // 20k rows per page
+    );
+    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
+    return std::tuple{std::move(input_table), std::move(result), num_chunks};
+  };
+
+  {
+    auto const [input, result, num_chunks] = do_test(400'000, false);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
+
+  {
+    auto const [input, result, num_chunks] = do_test(400'000, true);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
+}

From 9335bb779c12d3ee8875b869763adb300aebda5c Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 25 Oct 2022 15:13:42 -0700
Subject: [PATCH 099/162] MISC

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index f12e804c676..d9af3776be2 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -58,6 +58,7 @@ using strings_col      = cudf::test::strings_column_wrapper;
 using structs_col      = cudf::test::structs_column_wrapper;
 using int32s_lists_col = cudf::test::lists_column_wrapper<int32_t>;
 
+// TODO: Remove the last 2 params
 auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
                 std::string const& filename,
                 bool nullable,

From 3769fff107e1296f9deeac7f0e2b8eda2d44bc2c Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 25 Oct 2022 15:14:00 -0700
Subject: [PATCH 100/162] Cleanup comments

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu | 37 ++++---------------------------
 1 file changed, 4 insertions(+), 33 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 9e3112aa478..cc064a43779 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -171,9 +171,6 @@ void generate_depth_remappings(std::map<int, std::pair<std::vector<int>, std::ve
   }
 }
 
-/**
- * @copydoc cudf::io::detail::parquet::read_column_chunks
- */
 std::future<void> reader::impl::read_column_chunks(
   std::vector<std::unique_ptr<datasource::buffer>>& page_data,
   hostdevice_vector<gpu::ColumnChunkDesc>& chunks,  // TODO const?
@@ -232,9 +229,6 @@ std::future<void> reader::impl::read_column_chunks(
   return std::async(std::launch::deferred, sync_fn, std::move(read_tasks));
 }
 
-/**
- * @copydoc cudf::io::detail::parquet::count_page_headers
- */
 size_t reader::impl::count_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks)
 {
   size_t total_pages = 0;
@@ -250,9 +244,6 @@ size_t reader::impl::count_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>&
   return total_pages;
 }
 
-/**
- * @copydoc cudf::io::detail::parquet::decode_page_headers
- */
 void reader::impl::decode_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                                        hostdevice_vector<gpu::PageInfo>& pages)
 {
@@ -269,9 +260,6 @@ void reader::impl::decode_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>&
   pages.device_to_host(_stream, true);
 }
 
-/**
- * @copydoc cudf::io::detail::parquet::decompress_page_data
- */
 rmm::device_buffer reader::impl::decompress_page_data(
   hostdevice_vector<gpu::ColumnChunkDesc>& chunks, hostdevice_vector<gpu::PageInfo>& pages)
 {
@@ -443,9 +431,6 @@ rmm::device_buffer reader::impl::decompress_page_data(
   return decomp_pages;
 }
 
-/**
- * @copydoc cudf::io::detail::parquet::allocate_nesting_info
- */
 void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc> const& chunks,
                                          hostdevice_vector<gpu::PageInfo>& pages,
                                          hostdevice_vector<gpu::PageNestingInfo>& page_nesting_info)
@@ -566,9 +551,6 @@ void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc>
   page_nesting_info.host_to_device(_stream);
 }
 
-/**
- * @copydoc cudf::io::detail::parquet::decode_page_data
- */
 void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                                     hostdevice_vector<gpu::PageInfo>& pages,
                                     hostdevice_vector<gpu::PageNestingInfo>& page_nesting,
@@ -788,18 +770,6 @@ void reader::impl::preprocess_file_and_columns(
     preprocess_file(skip_rows, num_rows, row_group_list);
 
   if (_file_itm_data.has_data) {
-    // - compute column sizes and allocate output buffers.
-    //   important:
-    //   for nested schemas, we have to do some further preprocessing to determine:
-    //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
-    //    of
-    //      nesting and it's size is the row count)
-    //
-    // - for nested schemas, output buffer offset values per-page, per nesting-level for the
-    // purposes of decoding.
-    // TODO: make this a parameter.
-
-    //      auto const _chunk_read_limit = 0;
     preprocess_columns(_file_itm_data.chunks,
                        _file_itm_data.pages_info,
                        skip_rows_corrected,
@@ -807,7 +777,7 @@ void reader::impl::preprocess_file_and_columns(
                        uses_custom_row_bounds,
                        _chunk_read_limit);
 
-    if (_chunk_read_limit == 0) {
+    if (_chunk_read_limit == 0) {  // read the whole file at once
       CUDF_EXPECTS(_chunk_read_info.size() == 1,
                    "Reading the whole file should yield only one chunk.");
     }
@@ -889,9 +859,10 @@ table_with_metadata reader::impl::read(size_type skip_rows,
                                        std::vector<std::vector<size_type>> const& row_group_list)
 {
 #if defined(ALLOW_PLAIN_READ_CHUNK_LIMIT)
-  preprocess_file_and_columns(skip_rows, num_rows, uses_custom_row_bounds || _chunk_read_limit > 0, row_group_list);
+  preprocess_file_and_columns(
+    skip_rows, num_rows, uses_custom_row_bounds || _chunk_read_limit > 0, row_group_list);
   return read_chunk_internal(uses_custom_row_bounds || _chunk_read_limit > 0);
-#else 
+#else
   CUDF_EXPECTS(_chunk_read_limit == 0, "Reading the whole file must not have non-zero byte_limit.");
   preprocess_file_and_columns(skip_rows, num_rows, uses_custom_row_bounds, row_group_list);
   return read_chunk_internal(uses_custom_row_bounds);

From dc9ef5c38b496450473295011f239365bf731a45 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 25 Oct 2022 16:12:51 -0700
Subject: [PATCH 101/162] Construct output table metadata just once

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu  | 53 +++++++++++++++++++-----------
 cpp/src/io/parquet/reader_impl.hpp |  5 +++
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index cc064a43779..be5b87d323d 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -788,10 +788,11 @@ void reader::impl::preprocess_file_and_columns(
 
 table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bounds)
 {
-  table_metadata out_metadata;
+  // If `_output_metadata` has been constructed, just copy it over.
+  auto out_metadata = _output_metadata ? table_metadata{*_output_metadata} : table_metadata{};
 
   // output cudf columns as determined by the top level schema
-  std::vector<std::unique_ptr<column>> out_columns;
+  auto out_columns = std::vector<std::unique_ptr<column>>{};
   out_columns.reserve(_output_columns.size());
 
   if (!has_next()) { return finalize_output(out_metadata, out_columns); }
@@ -818,11 +819,16 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
 
   // create the final output cudf columns
   for (size_t i = 0; i < _output_columns.size(); ++i) {
-    column_name_info& col_name = out_metadata.schema_info.emplace_back("");
-    auto const metadata        = _reader_column_schema.has_value()
-                                   ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
-                                   : std::nullopt;
-    out_columns.emplace_back(make_column(_output_columns[i], &col_name, metadata, _stream, _mr));
+    auto const metadata = _reader_column_schema.has_value()
+                            ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
+                            : std::nullopt;
+    // Only construct `out_metadata` if `_output_metadata` has not been cached.
+    if (!_output_metadata) {
+      column_name_info& col_name = out_metadata.schema_info.emplace_back("");
+      out_columns.emplace_back(make_column(_output_columns[i], &col_name, metadata, _stream, _mr));
+    } else {
+      out_columns.emplace_back(make_column(_output_columns[i], nullptr, metadata, _stream, _mr));
+    }
   }
 
   return finalize_output(out_metadata, out_columns);
@@ -833,21 +839,30 @@ table_with_metadata reader::impl::finalize_output(table_metadata& out_metadata,
 {
   // Create empty columns as needed (this can happen if we've ended up with no actual data to read)
   for (size_t i = out_columns.size(); i < _output_columns.size(); ++i) {
-    column_name_info& col_name = out_metadata.schema_info.emplace_back("");
-    out_columns.emplace_back(io::detail::empty_like(_output_columns[i], &col_name, _stream, _mr));
+    if (!_output_metadata) {
+      column_name_info& col_name = out_metadata.schema_info.emplace_back("");
+      out_columns.emplace_back(io::detail::empty_like(_output_columns[i], &col_name, _stream, _mr));
+    } else {
+      out_columns.emplace_back(io::detail::empty_like(_output_columns[i], nullptr, _stream, _mr));
+    }
   }
 
-  // Return column names (must match order of returned columns)
-  out_metadata.column_names.resize(_output_columns.size());
-  for (size_t i = 0; i < _output_column_schemas.size(); i++) {
-    auto const& schema           = _metadata->get_schema(_output_column_schemas[i]);
-    out_metadata.column_names[i] = schema.name;
-  }
+  if (!_output_metadata) {
+    // Return column names (must match order of returned columns)
+    out_metadata.column_names.resize(_output_columns.size());
+    for (size_t i = 0; i < _output_column_schemas.size(); i++) {
+      auto const& schema           = _metadata->get_schema(_output_column_schemas[i]);
+      out_metadata.column_names[i] = schema.name;
+    }
 
-  // Return user metadata
-  out_metadata.per_file_user_data = _metadata->get_key_value_metadata();
-  out_metadata.user_data          = {out_metadata.per_file_user_data[0].begin(),
-                            out_metadata.per_file_user_data[0].end()};
+    // Return user metadata
+    out_metadata.per_file_user_data = _metadata->get_key_value_metadata();
+    out_metadata.user_data          = {out_metadata.per_file_user_data[0].begin(),
+                              out_metadata.per_file_user_data[0].end()};
+
+    // Finally, save the output table metadata into `_output_metadata` for reuse next time.
+    _output_metadata = std::make_unique<table_metadata>(out_metadata);
+  }
 
   return {std::make_unique<table>(std::move(out_columns)), std::move(out_metadata)};
 }
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 4d5f07844db..6eef645dcae 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -278,11 +278,16 @@ class reader::impl {
 
   // input columns to be processed
   std::vector<input_column_info> _input_columns;
+
   // output columns to be generated
   std::vector<column_buffer> _output_columns;
+
   // _output_columns associated schema indices
   std::vector<int> _output_column_schemas;
 
+  // _output_columns associated metadata
+  std::unique_ptr<table_metadata> _output_metadata;
+
   bool _strings_to_categorical = false;
   std::optional<std::vector<reader_column_schema>> _reader_column_schema;
   data_type _timestamp_type{type_id::EMPTY};

From 0366b7a1c92b2e686e4dc06c9084d724dad68ee8 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 25 Oct 2022 21:21:13 -0700
Subject: [PATCH 102/162] Construct `_output_columns` just once

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu  | 41 ++++++++++++++++++++++--------
 cpp/src/io/parquet/reader_impl.hpp |  3 +++
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index be5b87d323d..247b4021c70 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -34,8 +34,7 @@ namespace cudf::io::detail::parquet {
 
 namespace {
 
-inline void decompress_check(device_span<compression_result const> results,
-                             rmm::cuda_stream_view stream)
+void decompress_check(device_span<compression_result const> results, rmm::cuda_stream_view stream)
 {
   CUDF_EXPECTS(thrust::all_of(rmm::exec_policy(stream),
                               results.begin(),
@@ -46,6 +45,24 @@ inline void decompress_check(device_span<compression_result const> results,
                "Error during decompression");
 }
 
+/**
+ * @brief Recursively copy the output buffer from one to another.
+ *
+ * This only copies `name` and `user_data` fields, which are generated during reader construction.
+ *
+ * @param buff The old output buffer
+ * @param new_buff The new output buffer
+ */
+void copy_output_buffer(column_buffer const& buff, column_buffer& new_buff)
+{
+  new_buff.name      = buff.name;
+  new_buff.user_data = buff.user_data;
+  for (auto const& child : buff.children) {
+    auto& new_child = new_buff.children.emplace_back(column_buffer(child.type, child.is_nullable));
+    copy_output_buffer(child, new_child);
+  }
+}
+
 }  // namespace
 
 /**
@@ -756,6 +773,13 @@ reader::impl::impl(std::size_t chunk_read_limit,
          mr)
 {
   _chunk_read_limit = chunk_read_limit;
+
+  // Save the states of the output buffers for reuse in `chunk_read()`.
+  for (auto const& buff : _output_columns) {
+    auto& new_buff =
+      _output_columns_template.emplace_back(column_buffer(buff.type, buff.is_nullable));
+    copy_output_buffer(buff, new_buff);
+  }
 }
 
 void reader::impl::preprocess_file_and_columns(
@@ -886,14 +910,11 @@ table_with_metadata reader::impl::read(size_type skip_rows,
 
 table_with_metadata reader::impl::read_chunk()
 {
-  {
-    // TODO: this be called once, then _output_columns is saved as a template and copied to the
-    // output each time.
-    std::tie(_input_columns, _output_columns, _output_column_schemas) =
-      _metadata->select_columns(_options.get_columns(),
-                                _options.is_enabled_use_pandas_metadata(),
-                                _strings_to_categorical,
-                                _timestamp_type.id());
+  // Reset the output buffers to their original states (right after reader construction).
+  _output_columns.resize(0);
+  for (auto const& buff : _output_columns_template) {
+    auto& new_buff = _output_columns.emplace_back(column_buffer(buff.type, buff.is_nullable));
+    copy_output_buffer(buff, new_buff);
   }
 
   preprocess_file_and_columns(0, -1, true, {});
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 6eef645dcae..1d69a1d2110 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -282,6 +282,9 @@ class reader::impl {
   // output columns to be generated
   std::vector<column_buffer> _output_columns;
 
+  // data of output columns saved after construction for reuse
+  std::vector<column_buffer> _output_columns_template;
+
   // _output_columns associated schema indices
   std::vector<int> _output_column_schemas;
 

From ea2fe9c6afb8f67d16deb4881a30a0f37ed84d47 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 26 Oct 2022 06:30:52 -0700
Subject: [PATCH 103/162] Remove `options` member variable

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu  | 2 +-
 cpp/src/io/parquet/reader_impl.hpp | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 247b4021c70..434ae3391bc 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -738,7 +738,7 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                    parquet_reader_options const& options,
                    rmm::cuda_stream_view stream,
                    rmm::mr::device_memory_resource* mr)
-  : _stream(stream), _mr(mr), _sources(std::move(sources)), _options(options)
+  : _stream(stream), _mr(mr), _sources(std::move(sources))
 {
   // Open and parse the source dataset metadata
   _metadata = std::make_unique<aggregate_reader_metadata>(_sources);
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 1d69a1d2110..c53c72b3200 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -302,9 +302,6 @@ class reader::impl {
   std::size_t _chunk_read_limit{0};
   std::size_t _current_read_chunk{0};
   bool _file_preprocessed{false};
-
-  // TODO: Remove below
-  parquet_reader_options const _options;
 };
 
 }  // namespace cudf::io::detail::parquet

From 18040569223269d25d6a153ebf5d980b9b7108c2 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Wed, 26 Oct 2022 11:00:45 -0500
Subject: [PATCH 104/162] Make the chunked_read_limit a soft limit - if we
 can't find a split, do the best we can.  Fix a potential out of bounds array
 access.

---
 cpp/src/io/parquet/reader_impl_preprocess.cu | 77 +++++++++++---------
 1 file changed, 41 insertions(+), 36 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 6df318bfee6..b2000cee3bc 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -168,6 +168,46 @@ struct row_total_size {
   }
 };
 
+std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> const& sizes, size_type num_rows, size_t chunked_read_size)
+{
+  // now we have an array of {row_count, real output bytes}. just walk through it and generate
+  // splits.
+  // TODO: come up with a clever way to do this entirely in parallel. For now, as long as batch
+  // sizes are reasonably large, this shouldn't iterate too many times
+  std::vector<gpu::chunk_read_info> splits;
+  {
+    size_t cur_pos         = 0;
+    size_t cumulative_size = 0;
+    size_t cur_row_count   = 0;
+    while (cur_row_count < static_cast<size_t>(num_rows)) {
+      auto iter = thrust::make_transform_iterator(
+        sizes.begin() + cur_pos,
+        [cumulative_size](cumulative_row_info const& i) { return i.size_bytes - cumulative_size; });
+      int64_t p =
+        (thrust::lower_bound(
+           thrust::seq, iter, iter + sizes.size(), static_cast<size_t>(chunked_read_size)) -
+         iter) +
+        cur_pos;
+      if (static_cast<size_t>(p) >= sizes.size() || (sizes[p].size_bytes - cumulative_size > static_cast<size_t>(chunked_read_size))){
+        p--;
+      }
+
+      // best-try. if we can't find something that'll fit, we have to go bigger
+      while((sizes[p].row_count == cur_row_count || p < 0) && p < (static_cast<int64_t>(sizes.size()) - 1)){
+        p++;
+      }      
+
+      auto const start_row = cur_row_count;
+      cur_row_count        = sizes[p].row_count;
+      splits.push_back(gpu::chunk_read_info{start_row, cur_row_count - start_row});
+      // printf("Split: {%lu, %lu}\n", splits.back().skip_rows, splits.back().num_rows);
+      cur_pos         = p;
+      cumulative_size = sizes[p].size_bytes;
+    }
+  }
+  return splits;
+}
+
 std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo>& pages,
                                                  gpu::chunk_intermediate_data const& id,
                                                  size_type num_rows,
@@ -290,42 +330,7 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
   */
   // clang-format on
 
-  // now we have an array of {row_count, real output bytes}. just walk through it and generate
-  // splits.
-  // TODO: come up with a clever way to do this entirely in parallel. For now, as long as batch
-  // sizes are reasonably large, this shouldn't iterate too many times
-  std::vector<gpu::chunk_read_info> splits;
-  {
-    size_t cur_pos         = 0;
-    size_t cumulative_size = 0;
-    size_t cur_row_count   = 0;
-    while (cur_row_count < static_cast<size_t>(num_rows)) {
-      auto iter = thrust::make_transform_iterator(
-        h_adjusted.begin() + cur_pos,
-        [cumulative_size](cumulative_row_info const& i) { return i.size_bytes - cumulative_size; });
-      size_type p =
-        (thrust::lower_bound(
-           thrust::seq, iter, iter + h_adjusted.size(), static_cast<size_t>(chunked_read_size)) -
-         iter) +
-        cur_pos;
-      if (h_adjusted[p].size_bytes - cumulative_size > static_cast<size_t>(chunked_read_size) ||
-          static_cast<size_t>(p) == h_adjusted.size()) {
-        p--;
-      }
-      if (h_adjusted[p].row_count == cur_row_count || p < 0) {
-        CUDF_FAIL("Cannot find read split boundary small enough");
-      }
-
-      auto const start_row = cur_row_count;
-      cur_row_count        = h_adjusted[p].row_count;
-      splits.push_back(gpu::chunk_read_info{start_row, cur_row_count - start_row});
-      // printf("Split: {%lu, %lu}\n", splits.back().skip_rows, splits.back().num_rows);
-      cur_pos         = p;
-      cumulative_size = h_adjusted[p].size_bytes;
-    }
-  }
-
-  return splits;
+  return find_splits(h_adjusted, num_rows, chunked_read_size);
 }
 
 struct get_page_chunk_idx {

From 3d6e13bd6572a1fffafc898843b81d457b120260 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 26 Oct 2022 13:02:29 -0700
Subject: [PATCH 105/162] Add tests for structs of lists and lists of structs

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 128 ++++++++++++++++++-
 1 file changed, 122 insertions(+), 6 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index d9af3776be2..99474f60f50 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -24,6 +24,7 @@
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/column/column.hpp>
+#include <cudf/column/column_factories.hpp>
 #include <cudf/concatenate.hpp>
 #include <cudf/copying.hpp>
 #include <cudf/detail/iterator.cuh>
@@ -137,7 +138,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
 {
   auto constexpr num_rows = 40'000;
 
-  auto const do_test = [](std::size_t chunk_read_limit, bool nullable) {
+  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     auto const value_iter = thrust::make_counting_iterator(0);
     input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release());
@@ -165,7 +166,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
 {
   auto constexpr num_rows = 60'000;
 
-  auto const do_test = [](std::size_t chunk_read_limit, bool nullable) {
+  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     auto const value_iter = thrust::make_counting_iterator(0);
 
@@ -232,16 +233,18 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructs)
 {
   auto constexpr num_rows = 100'000;
 
-  auto const do_test = [](std::size_t chunk_read_limit, bool nullable) {
+  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     auto const int_iter = thrust::make_counting_iterator(0);
-    auto const str_iter = cudf::detail::make_counting_transform_iterator(
-      0, [&](int32_t i) { return std::to_string(i); });
     input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
     input_columns.emplace_back([=] {
       auto child1 = int32s_col(int_iter, int_iter + num_rows);
       auto child2 = int32s_col(int_iter + num_rows, int_iter + num_rows * 2);
+
+      auto const str_iter = cudf::detail::make_counting_transform_iterator(
+        0, [&](int32_t i) { return std::to_string(i); });
       auto child3 = strings_col{str_iter, str_iter + num_rows};
+
       return structs_col{{child1, child2, child3}}.release();
     }());
 
@@ -272,7 +275,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithLists)
 {
   auto constexpr num_rows = 100'000;
 
-  auto const do_test = [](std::size_t chunk_read_limit, bool nullable) {
+  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     auto const int_iter = thrust::make_counting_iterator(0);
     input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
@@ -307,3 +310,116 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithLists)
     CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
   }
 }
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructsOfLists)
+{
+  auto constexpr num_rows = 100'000;
+
+  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const int_iter = thrust::make_counting_iterator(0);
+    input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
+    input_columns.emplace_back([=] {
+      std::vector<std::unique_ptr<cudf::column>> child_columns;
+      child_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
+      child_columns.emplace_back(
+        int32s_col(int_iter + num_rows, int_iter + num_rows * 2).release());
+
+      auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) {
+        return std::to_string(i) + "++++++++++++++++++++" + std::to_string(i);
+      });
+      child_columns.emplace_back(strings_col{str_iter, str_iter + num_rows}.release());
+
+      auto const template_lists = int32s_lists_col{
+        int32s_lists_col{}, int32s_lists_col{0}, int32s_lists_col{0, 1}, int32s_lists_col{0, 1, 2}};
+      auto const gather_iter =
+        cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return i % 4; });
+      auto const gather_map = int32s_col(gather_iter, gather_iter + num_rows);
+      child_columns.emplace_back(
+        std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front()));
+
+      return structs_col(std::move(child_columns)).release();
+    }());
+
+    auto [input_table, filepath] = write_file(input_columns,
+                                              "chunked_read_with_structs_of_lists",
+                                              nullable,
+                                              512 * 1024,  // 512KB per page
+                                              20000        // 20k rows per page
+    );
+    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
+    return std::tuple{std::move(input_table), std::move(result), num_chunks};
+  };
+
+  {
+    auto const [input, result, num_chunks] = do_test(500'000, false);
+    EXPECT_EQ(num_chunks, 10);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
+
+  {
+    auto const [input, result, num_chunks] = do_test(500'000, true);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
+}
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsOfStructs)
+{
+  auto constexpr num_rows = 100'000;
+
+  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const int_iter = thrust::make_counting_iterator(0);
+    input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
+
+    auto offsets = std::vector<cudf::size_type>{};
+    offsets.reserve(num_rows * 2);
+    cudf::size_type num_structs = 0;
+    for (int i = 0; i < num_rows; ++i) {
+      offsets.push_back(num_structs);
+      auto const new_list_size = i % 4;
+      num_structs += new_list_size;
+    }
+    offsets.push_back(num_structs);
+
+    auto const make_structs_col = [=] {
+      auto child1 = int32s_col(int_iter, int_iter + num_structs);
+      auto child2 = int32s_col(int_iter + num_structs, int_iter + num_structs * 2);
+
+      auto const str_iter = cudf::detail::make_counting_transform_iterator(
+        0, [&](int32_t i) { return std::to_string(i) + std::to_string(i) + std::to_string(i); });
+      auto child3 = strings_col{str_iter, str_iter + num_structs};
+
+      return structs_col{{child1, child2, child3}}.release();
+    };
+
+    input_columns.emplace_back(
+      cudf::make_lists_column(static_cast<cudf::size_type>(offsets.size() - 1),
+                              int32s_col(offsets.begin(), offsets.end()).release(),
+                              make_structs_col(),
+                              0,
+                              rmm::device_buffer{}));
+
+    auto [input_table, filepath] = write_file(input_columns,
+                                              "chunked_read_with_lists_of_structs",
+                                              nullable,
+                                              512 * 1024,  // 512KB per page
+                                              20000        // 20k rows per page
+    );
+    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
+    return std::tuple{std::move(input_table), std::move(result), num_chunks};
+  };
+
+  {
+    auto const [input, result, num_chunks] = do_test(1'000'000, false);
+    EXPECT_EQ(num_chunks, 7);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
+
+  {
+    auto const [input, result, num_chunks] = do_test(1'000'000, true);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
+}

From 826c46f3c846edd694756855bfabb1e7d0bb6d8a Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Wed, 26 Oct 2022 17:17:44 -0500
Subject: [PATCH 106/162] Fixed an issue in split generation code causing
 indexing off the end of the input array. Added some specific page boundary
 check tests.

---
 cpp/src/io/parquet/reader_impl_preprocess.cu | 40 ++++++-------
 cpp/tests/io/parquet_chunked_reader_test.cpp | 59 ++++++++++++++++++++
 2 files changed, 80 insertions(+), 19 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index b2000cee3bc..65b8ba0abd1 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -176,33 +176,35 @@ std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> c
   // sizes are reasonably large, this shouldn't iterate too many times
   std::vector<gpu::chunk_read_info> splits;
   {
-    size_t cur_pos         = 0;
-    size_t cumulative_size = 0;
-    size_t cur_row_count   = 0;
-    while (cur_row_count < static_cast<size_t>(num_rows)) {
-      auto iter = thrust::make_transform_iterator(
-        sizes.begin() + cur_pos,
-        [cumulative_size](cumulative_row_info const& i) { return i.size_bytes - cumulative_size; });
-      int64_t p =
-        (thrust::lower_bound(
-           thrust::seq, iter, iter + sizes.size(), static_cast<size_t>(chunked_read_size)) -
-         iter) +
-        cur_pos;
-      if (static_cast<size_t>(p) >= sizes.size() || (sizes[p].size_bytes - cumulative_size > static_cast<size_t>(chunked_read_size))){
+    size_t cur_pos              = 0;
+    size_t cur_cumulative_size  = 0;
+    size_t cur_row_count        = 0;        
+    auto start = thrust::make_transform_iterator(sizes.begin(), [&](cumulative_row_info const& i) { return i.size_bytes - cur_cumulative_size; });
+    auto end = start + sizes.size();
+    while (cur_row_count < static_cast<size_t>(num_rows)) { 
+      int64_t p = thrust::lower_bound(thrust::seq, start + cur_pos, end, static_cast<size_t>(chunked_read_size)) - start;
+
+      // if we're past the end, or if the returned bucket is > than the chunked_read_size, move back one.
+      if (static_cast<size_t>(p) >= sizes.size() ||      
+         (sizes[p].size_bytes - cur_cumulative_size > static_cast<size_t>(chunked_read_size)) ){
         p--;
       }
-
-      // best-try. if we can't find something that'll fit, we have to go bigger
-      while((sizes[p].row_count == cur_row_count || p < 0) && p < (static_cast<int64_t>(sizes.size()) - 1)){
+      
+      // best-try. if we can't find something that'll fit, we have to go bigger. we're doing this in a loop
+      // because all of the cumulative sizes for all the pages are sorted into one big list.  so if we had
+      // two columns, both of which had an entry {1000, 10000}, that entry would be in the list twice. so we have 
+      // to iterate until we skip past all of them.  The idea is that we either do this, or we have to call
+      // unique() on the input first.
+      while(p < (static_cast<int64_t>(sizes.size()) - 1) && (sizes[p].row_count == cur_row_count || p < 0)){
         p++;
-      }      
+      }
 
       auto const start_row = cur_row_count;
       cur_row_count        = sizes[p].row_count;
       splits.push_back(gpu::chunk_read_info{start_row, cur_row_count - start_row});
-      // printf("Split: {%lu, %lu}\n", splits.back().skip_rows, splits.back().num_rows);
+      //printf("Split: {%lu, %lu}\n", splits.back().skip_rows, splits.back().num_rows);
       cur_pos         = p;
-      cumulative_size = sizes[p].size_bytes;
+      cur_cumulative_size = sizes[p].size_bytes;
     }
   }
   return splits;
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 1867b84d8c4..38af3ec0ce7 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -157,6 +157,65 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
   }
 }
 
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadBoundaryCases)
+{
+  // tests some specific boundary conditions in the split calculations.
+
+  auto constexpr num_rows = 40'000;
+
+  auto const do_test = [](std::size_t chunk_read_limit) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const value_iter = thrust::make_counting_iterator(0);
+    input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release());
+
+    auto [input_table, filepath] = write_file(input_columns, "chunked_read_simple_boundary", false);
+    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
+    return std::tuple{std::move(input_table), std::move(result), num_chunks};
+  };
+  
+  // test with a limit slightly less than one page of data
+  {
+    auto [expected, result, num_chunks] = do_test(79'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // test with a limit exactly the size one page of data
+  {
+    auto [expected, result, num_chunks] = do_test(80'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // test with a limit slightly more the size one page of data
+  {
+    auto [expected, result, num_chunks] = do_test(81'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+   // test with a limit slightly less than two pages of data
+  {
+    auto [expected, result, num_chunks] = do_test(159'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // test with a limit exactly the size of two pages of data
+  {
+    auto [expected, result, num_chunks] = do_test(160'000);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // test with a limit slightly more the size two pages of data
+  {
+    auto [expected, result, num_chunks] = do_test(161'000);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }  
+}
+
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
 {
   auto constexpr num_rows = 60'000;

From 88ca034c3aaa4895abf3766ab3f303b2e8486501 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 26 Oct 2022 15:20:36 -0700
Subject: [PATCH 107/162] Just reformat

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl_preprocess.cu | 52 +++++++++++---------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 65b8ba0abd1..65962aee473 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -168,7 +168,9 @@ struct row_total_size {
   }
 };
 
-std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> const& sizes, size_type num_rows, size_t chunked_read_size)
+std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> const& sizes,
+                                              size_type num_rows,
+                                              size_t chunked_read_size)
 {
   // now we have an array of {row_count, real output bytes}. just walk through it and generate
   // splits.
@@ -176,34 +178,40 @@ std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> c
   // sizes are reasonably large, this shouldn't iterate too many times
   std::vector<gpu::chunk_read_info> splits;
   {
-    size_t cur_pos              = 0;
-    size_t cur_cumulative_size  = 0;
-    size_t cur_row_count        = 0;        
-    auto start = thrust::make_transform_iterator(sizes.begin(), [&](cumulative_row_info const& i) { return i.size_bytes - cur_cumulative_size; });
-    auto end = start + sizes.size();
-    while (cur_row_count < static_cast<size_t>(num_rows)) { 
-      int64_t p = thrust::lower_bound(thrust::seq, start + cur_pos, end, static_cast<size_t>(chunked_read_size)) - start;
-
-      // if we're past the end, or if the returned bucket is > than the chunked_read_size, move back one.
-      if (static_cast<size_t>(p) >= sizes.size() ||      
-         (sizes[p].size_bytes - cur_cumulative_size > static_cast<size_t>(chunked_read_size)) ){
+    size_t cur_pos             = 0;
+    size_t cur_cumulative_size = 0;
+    size_t cur_row_count       = 0;
+    auto start = thrust::make_transform_iterator(sizes.begin(), [&](cumulative_row_info const& i) {
+      return i.size_bytes - cur_cumulative_size;
+    });
+    auto end   = start + sizes.size();
+    while (cur_row_count < static_cast<size_t>(num_rows)) {
+      int64_t p = thrust::lower_bound(
+                    thrust::seq, start + cur_pos, end, static_cast<size_t>(chunked_read_size)) -
+                  start;
+
+      // if we're past the end, or if the returned bucket is > than the chunked_read_size, move back
+      // one.
+      if (static_cast<size_t>(p) >= sizes.size() ||
+          (sizes[p].size_bytes - cur_cumulative_size > static_cast<size_t>(chunked_read_size))) {
         p--;
       }
-      
-      // best-try. if we can't find something that'll fit, we have to go bigger. we're doing this in a loop
-      // because all of the cumulative sizes for all the pages are sorted into one big list.  so if we had
-      // two columns, both of which had an entry {1000, 10000}, that entry would be in the list twice. so we have 
-      // to iterate until we skip past all of them.  The idea is that we either do this, or we have to call
-      // unique() on the input first.
-      while(p < (static_cast<int64_t>(sizes.size()) - 1) && (sizes[p].row_count == cur_row_count || p < 0)){
+
+      // best-try. if we can't find something that'll fit, we have to go bigger. we're doing this in
+      // a loop because all of the cumulative sizes for all the pages are sorted into one big list.
+      // so if we had two columns, both of which had an entry {1000, 10000}, that entry would be in
+      // the list twice. so we have to iterate until we skip past all of them.  The idea is that we
+      // either do this, or we have to call unique() on the input first.
+      while (p < (static_cast<int64_t>(sizes.size()) - 1) &&
+             (sizes[p].row_count == cur_row_count || p < 0)) {
         p++;
       }
 
       auto const start_row = cur_row_count;
       cur_row_count        = sizes[p].row_count;
       splits.push_back(gpu::chunk_read_info{start_row, cur_row_count - start_row});
-      //printf("Split: {%lu, %lu}\n", splits.back().skip_rows, splits.back().num_rows);
-      cur_pos         = p;
+      // printf("Split: {%lu, %lu}\n", splits.back().skip_rows, splits.back().num_rows);
+      cur_pos             = p;
       cur_cumulative_size = sizes[p].size_bytes;
     }
   }
@@ -230,7 +238,7 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
                                 page_input,
                                 c_info.begin(),
                                 thrust::equal_to{},
-                                cumulative_row_sum{});  
+                                cumulative_row_sum{});
   // clang-format off
   /*
   stream.synchronize();

From d76ee06936a15434b3fe1ba3233ded531ae9fef5 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 26 Oct 2022 15:29:23 -0700
Subject: [PATCH 108/162] Change variable names in tests

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 82 ++++++++++----------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 2f5b0155809..a54a7e72a82 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -66,13 +66,13 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
                 std::size_t max_page_size_bytes = cudf::io::default_max_page_size_bytes,
                 std::size_t max_page_size_rows  = cudf::io::default_max_page_size_rows)
 {
-  // Generate deterministic bitmask instead of random bitmask for easy verification.
-  auto const valid_iter = cudf::detail::make_counting_transform_iterator(
-    0, [&](int32_t i) -> bool { return static_cast<bool>(i % 2); });
-
   // Just shift nulls of the next column by one position to avoid having all nulls in the same
   // table rows.
   if (nullable) {
+    // Generate deterministic bitmask instead of random bitmask for easy verification.
+    auto const valid_iter = cudf::detail::make_counting_transform_iterator(
+      0, [&](int32_t i) -> bool { return static_cast<bool>(i % 2); });
+
     cudf::size_type offset{0};
     for (auto& col : input_columns) {
       col->set_null_mask(
@@ -150,15 +150,15 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
   };
 
   {
-    auto const [input, result, num_chunks] = do_test(240'000, false);
+    auto const [expected, result, num_chunks] = do_test(240'000, false);
     EXPECT_EQ(num_chunks, 2);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   {
-    auto const [input, result, num_chunks] = do_test(240'000, true);
+    auto const [expected, result, num_chunks] = do_test(240'000, true);
     EXPECT_EQ(num_chunks, 2);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 }
 
@@ -177,48 +177,48 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadBoundaryCases)
     auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
     return std::tuple{std::move(input_table), std::move(result), num_chunks};
   };
-  
+
   // test with a limit slightly less than one page of data
   {
-    auto [expected, result, num_chunks] = do_test(79'000);
+    auto const [expected, result, num_chunks] = do_test(79'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // test with a limit exactly the size one page of data
   {
-    auto [expected, result, num_chunks] = do_test(80'000);
+    auto const [expected, result, num_chunks] = do_test(80'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // test with a limit slightly more the size one page of data
   {
-    auto [expected, result, num_chunks] = do_test(81'000);
+    auto const [expected, result, num_chunks] = do_test(81'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
-   // test with a limit slightly less than two pages of data
+  // test with a limit slightly less than two pages of data
   {
-    auto [expected, result, num_chunks] = do_test(159'000);
+    auto const [expected, result, num_chunks] = do_test(159'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // test with a limit exactly the size of two pages of data
   {
-    auto [expected, result, num_chunks] = do_test(160'000);
+    auto const [expected, result, num_chunks] = do_test(160'000);
     EXPECT_EQ(num_chunks, 1);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // test with a limit slightly more the size two pages of data
   {
-    auto [expected, result, num_chunks] = do_test(161'000);
+    auto const [expected, result, num_chunks] = do_test(161'000);
     EXPECT_EQ(num_chunks, 1);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
-  }  
+  }
 }
 
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
@@ -266,25 +266,25 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
   };
 
   {
-    auto const [input, result, num_chunks] = do_test(500'000, false);
+    auto const [expected, result, num_chunks] = do_test(500'000, false);
     EXPECT_EQ(num_chunks, 2);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
   {
-    auto const [input, result, num_chunks] = do_test(500'000, true);
+    auto const [expected, result, num_chunks] = do_test(500'000, true);
     EXPECT_EQ(num_chunks, 2);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   {
-    auto const [input, result, num_chunks] = do_test(1'000'000, false);
+    auto const [expected, result, num_chunks] = do_test(1'000'000, false);
     EXPECT_EQ(num_chunks, 1);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
   {
-    auto const [input, result, num_chunks] = do_test(1'000'000, true);
+    auto const [expected, result, num_chunks] = do_test(1'000'000, true);
     EXPECT_EQ(num_chunks, 1);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 }
 
@@ -318,15 +318,15 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructs)
   };
 
   {
-    auto const [input, result, num_chunks] = do_test(500'000, false);
+    auto const [expected, result, num_chunks] = do_test(500'000, false);
     EXPECT_EQ(num_chunks, 5);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   {
-    auto const [input, result, num_chunks] = do_test(500'000, true);
+    auto const [expected, result, num_chunks] = do_test(500'000, true);
     EXPECT_EQ(num_chunks, 5);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 }
 
@@ -358,15 +358,15 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithLists)
   };
 
   {
-    auto const [input, result, num_chunks] = do_test(400'000, false);
+    auto const [expected, result, num_chunks] = do_test(400'000, false);
     EXPECT_EQ(num_chunks, 3);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   {
-    auto const [input, result, num_chunks] = do_test(400'000, true);
+    auto const [expected, result, num_chunks] = do_test(400'000, true);
     EXPECT_EQ(num_chunks, 3);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 }
 
@@ -411,15 +411,15 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructsOfLists)
   };
 
   {
-    auto const [input, result, num_chunks] = do_test(500'000, false);
+    auto const [expected, result, num_chunks] = do_test(500'000, false);
     EXPECT_EQ(num_chunks, 10);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   {
-    auto const [input, result, num_chunks] = do_test(500'000, true);
+    auto const [expected, result, num_chunks] = do_test(500'000, true);
     EXPECT_EQ(num_chunks, 5);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 }
 
@@ -471,14 +471,14 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsOfStructs)
   };
 
   {
-    auto const [input, result, num_chunks] = do_test(1'000'000, false);
+    auto const [expected, result, num_chunks] = do_test(1'000'000, false);
     EXPECT_EQ(num_chunks, 7);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   {
-    auto const [input, result, num_chunks] = do_test(1'000'000, true);
+    auto const [expected, result, num_chunks] = do_test(1'000'000, true);
     EXPECT_EQ(num_chunks, 5);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 }

From b8063231cf62385e59b9225105767d9c5a855930 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Thu, 27 Oct 2022 14:26:18 -0500
Subject: [PATCH 109/162] Optimization:  store off global nesting sizes per
 page so that during trim passes we only have to fully process pages that are
 on the edges of the skip_rows/num_rows boundary.

---
 cpp/src/io/parquet/page_data.cu              | 220 +++++++++++--------
 cpp/src/io/parquet/parquet_gpu.hpp           |   8 +-
 cpp/src/io/parquet/reader_impl_preprocess.cu |  13 +-
 cpp/tests/io/parquet_chunked_reader_test.cpp |   6 +-
 4 files changed, 141 insertions(+), 106 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 9c25fe1e7a1..f0409859486 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -287,13 +287,15 @@ __device__ void gpuDecodeStream(
  *
  * @return The new output position
  */
-template<bool sizes_only>
-__device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s* s, int target_pos, int t)
+template <bool sizes_only>
+__device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s* s,
+                                                          int target_pos,
+                                                          int t)
 {
   const uint8_t* end = s->data_end;
   int dict_bits      = s->dict_bits;
   int pos            = s->dict_pos;
-  int str_len = 0;  
+  int str_len        = 0;
 
   while (pos < target_pos) {
     int is_literal, batch_len;
@@ -338,7 +340,7 @@ __device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s*
     __syncwarp();
     is_literal = shuffle(is_literal);
     batch_len  = shuffle(batch_len);
-    
+
     int len = 0;
     if (t < batch_len) {
       int dict_idx = s->dict_val;
@@ -361,20 +363,18 @@ __device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s*
           dict_idx &= (1 << dict_bits) - 1;
         }
       }
-      
+
       // if we're computing indices, store it off.
-      if constexpr(sizes_only){        
-        len = [&](){
-          // we may end up decoding more indices than we asked for. so don't include those in the size
-          // calculation
-          if(pos + t >= target_pos){
-            return 0;
-          }          
+      if constexpr (sizes_only) {
+        len = [&]() {
+          // we may end up decoding more indices than we asked for. so don't include those in the
+          // size calculation
+          if (pos + t >= target_pos) { return 0; }
           // TODO:  refactor this with gpuGetStringData / gpuGetStringSize
           uint32_t const dict_pos = (s->dict_bits > 0) ? dict_idx * sizeof(string_index_pair) : 0;
           if (target_pos && dict_pos < (uint32_t)s->dict_size) {
             const auto* src = reinterpret_cast<const string_index_pair*>(s->dict_base + dict_pos);
-            return src->second;            
+            return src->second;
           }
           return 0;
         }();
@@ -384,7 +384,7 @@ __device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s*
     }
 
     // if we're computing sizes, sum it
-    if constexpr(sizes_only){     
+    if constexpr (sizes_only) {
       typedef cub::WarpReduce<size_type> WarpReduce;
       __shared__ typename WarpReduce::TempStorage temp_storage;
       // note: str_len will only be valid on thread 0.
@@ -469,9 +469,9 @@ __device__ int gpuDecodeRleBooleans(volatile page_state_s* s, int target_pos, in
  * @return The new output position
  */
 __device__ size_type gpuInitStringDescriptors(volatile page_state_s* s, int target_pos, int t)
-{  
-  int pos = s->dict_pos;
-  int total_len = 0;    
+{
+  int pos       = s->dict_pos;
+  int total_len = 0;
 
   // This step is purely serial
   if (!t) {
@@ -983,22 +983,24 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
         s->dtype_len = 8;  // Convert to 64-bit timestamp
       }
 
-      // NOTE: s->page.num_rows, s->col.chunk_row, s->first_row and s->num_rows will be 
+      // NOTE: s->page.num_rows, s->col.chunk_row, s->first_row and s->num_rows will be
       // invalid/bogus during first pass of the preprocess step for nested types. this is ok
       // because we ignore these values in that stage.
-      { 
+      {
         auto const max_row = min_row + num_rows;
-        
+
         // if we are totally outside the range of the input, do nothing
-        if((page_start_row > max_row) || (page_start_row + s->page.num_rows < min_row)){
+        if ((page_start_row > max_row) || (page_start_row + s->page.num_rows < min_row)) {
           s->first_row = 0;
-          s->num_rows = 0;
+          s->num_rows  = 0;
         }
         // otherwise
         else {
-          s->first_row = page_start_row >= min_row ? 0 : min_row - page_start_row;
+          s->first_row             = page_start_row >= min_row ? 0 : min_row - page_start_row;
           auto const max_page_rows = s->page.num_rows - s->first_row;
-          s->num_rows = (page_start_row + s->first_row) + max_page_rows <= max_row ? max_page_rows : max_row - (page_start_row + s->first_row);
+          s->num_rows              = (page_start_row + s->first_row) + max_page_rows <= max_row
+                                       ? max_page_rows
+                                       : max_row - (page_start_row + s->first_row);
         }
       }
 
@@ -1136,9 +1138,10 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
       if (decode_step) {
         s->input_value_count = s->page.skipped_values > -1 ? s->page.skipped_values : 0;
       } else {
-        s->input_value_count        = 0;
-        s->input_leaf_count         = 0;
-        s->page.skipped_values      = -1;   // magic number to indicate it hasn't been set for use inside UpdatePageSizes
+        s->input_value_count = 0;
+        s->input_leaf_count  = 0;
+        s->page.skipped_values =
+          -1;  // magic number to indicate it hasn't been set for use inside UpdatePageSizes
         s->page.skipped_leaf_values = 0;
       }
     }
@@ -1473,9 +1476,6 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   int input_leaf_count = s->input_leaf_count;
   // how many rows we've processed in the page so far
   int input_row_count = s->input_row_count;
-  
-  // how many valid leaves we've processed
-  // int input_leaf_valid_count = s->input_leaf_valid_count;
 
   while (input_value_count < target_input_value_count) {
     int start_depth, end_depth, d;
@@ -1490,7 +1490,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
     // is this thread within row bounds? on the first pass we don't know the bounds, so we will be
     // computing the full size of the column.  on the second pass, we will know our actual row
     // bounds, so the computation will cap sizes properly.
-    int in_row_bounds                = 1;
+    int in_row_bounds = 1;
     if (bounds_set) {
       // absolute row index
       int32_t thread_row_index =
@@ -1522,9 +1522,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
       int const in_nesting_bounds =
         (s_idx >= start_depth && s_idx <= end_depth && in_row_bounds) ? 1 : 0;
       uint32_t const count_mask = ballot(in_nesting_bounds);
-      if (!t) {
-        pni->size += __popc(count_mask);
-      }
+      if (!t) { pni->batch_size += __popc(count_mask); }
 
       /*
       if (s_idx == max_depth - 1) {
@@ -1544,24 +1542,23 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   if (!t) {
     s->input_value_count = target_input_value_count;
     s->input_leaf_count  = input_leaf_count;
-    // s->input_leaf_valid_count = input_leaf_valid_count;
     s->input_row_count   = input_row_count;
   }
 }
 
 __device__ size_type gpuGetStringSizes(page_state_s* s, int target_count, int t)
-{  
-  auto dict_target_pos = target_count;  
-  size_type str_len = 0;
+{
+  auto dict_target_pos = target_count;
+  size_type str_len    = 0;
   if (s->dict_base) {
-    auto const[new_target_pos, len] = gpuDecodeDictionaryIndices<true>(s, target_count, t);
-    dict_target_pos = new_target_pos;
-    str_len = len;
+    auto const [new_target_pos, len] = gpuDecodeDictionaryIndices<true>(s, target_count, t);
+    dict_target_pos                  = new_target_pos;
+    str_len                          = len;
   } else if ((s->col.data_type & 7) == BYTE_ARRAY) {
-    str_len = gpuInitStringDescriptors(s, target_count, t);    
+    str_len = gpuInitStringDescriptors(s, target_count, t);
   }
   if (!t) { *(volatile int32_t*)&s->dict_pos = dict_target_pos; }
-  return str_len;  
+  return str_len;
 }
 
 /**
@@ -1582,46 +1579,17 @@ __global__ void __launch_bounds__(block_size)
                       device_span<ColumnChunkDesc const> chunks,
                       size_t min_row,
                       size_t num_rows,
-                      bool compute_num_rows_pass,
+                      bool base_pass,
                       bool compute_string_sizes)
 {
   __shared__ __align__(16) page_state_s state_g;
 
   page_state_s* const s = &state_g;
-  int  page_idx         = blockIdx.x;
+  int page_idx          = blockIdx.x;
   int t                 = threadIdx.x;
   PageInfo* pp          = &pages[page_idx];
 
-  if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, false)) {
-    return;
-  }
-
-  // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
-  // containing lists anywhere within).
-  bool const has_repetition   = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
-  compute_string_sizes = compute_string_sizes && ((s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4);  
-
-  // reasons we might want to early out:
-  // - if this is a flat hierarchy (no lists) and is not a string column. in this case we don't need to do 
-  //   the expensive work of traversing the level data to determine sizes.  we can just compute it directly.
-  // - if this is the trim pass and we have no rows to output for this page.
-  if (!has_repetition && !compute_string_sizes) {    
-    if (!t) {
-      // note: doing this for all nesting levels because we can still have structs even if we don't
-      // have lists.
-      for (size_type idx = 0; idx < pp->num_nesting_levels; idx++) {
-        pp->nesting[idx].size = pp->num_input_values;
-      }
-    }
-    return;
-  }
-
-  // zero sizes
-  int d = 0;
-  while (d < s->page.num_nesting_levels) {
-    if (d + t < s->page.num_nesting_levels) { s->page.nesting[d + t].size = 0; }
-    d += blockDim.x;
-  }
+  if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, false)) { return; }
   if (!t) {
     s->page.skipped_values      = -1;
     s->page.skipped_leaf_values = 0;
@@ -1629,17 +1597,71 @@ __global__ void __launch_bounds__(block_size)
     s->input_row_count          = 0;
     s->input_value_count        = 0;
 
-    // if we're computing the number of rows, make sure we visit absolutely everything
-    if (compute_num_rows_pass) {
+    // in the base pass, we're computing the number of rows, make sure we visit absolutely
+    // everything
+    if (base_pass) {
       s->first_row             = 0;
       s->num_rows              = INT_MAX;
       s->row_index_lower_bound = -1;
     }
   }
-  // if we have no work to do for this page.
-  if(!compute_num_rows_pass && s->num_rows == 0){
+
+  // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
+  // containing lists anywhere within).
+  bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
+  compute_string_sizes =
+    compute_string_sizes && ((s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4);
+
+  // various early out optimizations:
+
+  // - if this is a flat hierarchy (no lists) and is not a string column. in this case we don't need
+  // to do
+  //   the expensive work of traversing the level data to determine sizes.  we can just compute it
+  //   directly.
+  if (!has_repetition && !compute_string_sizes) {
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) {
+        if (base_pass) { pp->nesting[i].size = pp->num_input_values; }
+        pp->nesting[i].batch_size = pp->num_input_values;
+      }
+      d += blockDim.x;
+    }
+    return;
+  }
+
+  // - if this page is not at the beginning or end of the trim bounds, the batch size is
+  //   the full page size
+  if (!base_pass && s->num_rows == s->page.num_rows) {
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) { pp->nesting[i].batch_size = pp->nesting[i].size; }
+      d += blockDim.x;
+    }
     return;
   }
+
+  // - if this page is completely trimmed, zero out sizes.
+  if (!base_pass && s->num_rows == 0) {
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) { pp->nesting[i].batch_size = 0; }
+      d += blockDim.x;
+    }
+    return;
+  }
+
+  // at this point we are going to be fully recomputing batch information
+
+  // zero sizes
+  int d = 0;
+  while (d < s->page.num_nesting_levels) {
+    if (d + t < s->page.num_nesting_levels) { s->page.nesting[d + t].batch_size = 0; }
+    d += blockDim.x;
+  }
   __syncthreads();
 
   // optimization : it might be useful to have a version of gpuDecodeStream that could go wider than
@@ -1662,29 +1684,40 @@ __global__ void __launch_bounds__(block_size)
       int actual_input_count = has_repetition ? min(s->lvl_count[level_type::REPETITION],
                                                     s->lvl_count[level_type::DEFINITION])
                                               : s->lvl_count[level_type::DEFINITION];
-            
+
       // process what we got back
-      gpuUpdatePageSizes(s, actual_input_count, t, !compute_num_rows_pass);
+      gpuUpdatePageSizes(s, actual_input_count, t, !base_pass);
       if (compute_string_sizes) {
         auto const str_len = gpuGetStringSizes(s, s->input_leaf_count, t);
-        if(!t){
-          s->page.str_bytes += str_len;
-        }
+        if (!t) { s->page.str_bytes += str_len; }
       }
 
-      target_input_count = actual_input_count + batch_size;  
+      target_input_count = actual_input_count + batch_size;
       __syncwarp();
     }
   }
-  // update # rows in the actual page
-  if (!t) {
-    if(compute_num_rows_pass){
-      pp->num_rows           = s->page.nesting[0].size;
+
+  // update output results:
+  // - real number of rows for the whole page
+  // - nesting sizes for the whole page
+  // - skipped value information for trimmed pages
+  // - string bytes
+  if (base_pass) {
+    // nesting level 0 is the root column, so the size is also the # of rows
+    if (!t) { pp->num_rows = s->page.nesting[0].batch_size; }
+
+    // store off this batch size as the "full" size
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) { pp->nesting[i].size = pp->nesting[i].batch_size; }
+      d += blockDim.x;
     }
+  }
+  if (!t) {
     pp->skipped_values      = s->page.skipped_values;
     pp->skipped_leaf_values = s->page.skipped_leaf_values;
     pp->str_bytes           = s->page.str_bytes;
-    // printf("STR BYTES: %d\n", s->page.str_bytes);
   }
 }
 
@@ -1714,9 +1747,7 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
   if (!setupLocalPageInfo(s, &pages[page_idx], chunks, min_row, num_rows, true)) { return; }
 
   // if we have no rows to do (eg, in a skip_rows/num_rows case)
-  if(s->num_rows == 0){
-    return;
-  }
+  if (s->num_rows == 0) { return; }
 
   if (s->dict_base) {
     out_thread0 = (s->dict_bits > 0) ? 64 : 32;
@@ -1753,7 +1784,8 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
 
       // WARP1: Decode dictionary indices, booleans or string positions
       if (s->dict_base) {
-        auto const[new_target_pos, _] = gpuDecodeDictionaryIndices<false>(s, src_target_pos, t & 0x1f);
+        auto const [new_target_pos, _] =
+          gpuDecodeDictionaryIndices<false>(s, src_target_pos, t & 0x1f);
         src_target_pos = new_target_pos;
       } else if ((s->col.data_type & 7) == BOOLEAN) {
         src_target_pos = gpuDecodeRleBooleans(s, src_target_pos, t & 0x1f);
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index ab4f873da78..d344ad8e094 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -109,7 +109,9 @@ struct PageNestingInfo {
   int32_t max_rep_level;
 
   // set during preprocessing
-  int32_t size;  // this page/nesting-level's row count contribution to the output column
+  int32_t size;  // this page/nesting-level's row count contribution to the output column, if fully
+                 // decoded
+  int32_t batch_size;        // the size of the page for this batch
   int32_t page_start_value;  // absolute output start index in output column data
 
   // set during data decoding
@@ -164,7 +166,9 @@ struct PageInfo {
   int skipped_values;
   // # of values skipped in the actual data stream.
   int skipped_leaf_values;
-  int32_t str_bytes;  // for string columns only, the size in for all the chars in the string
+  // for string columns only, the size of all the chars in the string for
+  // this page. only valid/computed during the base preprocess pass
+  int32_t str_bytes;
 
   // nesting information (input/output) for each page
   int num_nesting_levels;
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 65962aee473..4d128ff910d 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -119,7 +119,7 @@ template <>
 __device__ size_t row_size_functor::operator()<string_view>(size_t num_rows, bool nullable)
 {
   // only returns the size of offsets and validity. the size of the actual string chars
-  // is tracked seperately.
+  // is tracked separately.
   auto const offset_size = sizeof(offset_type);
   return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable);
 }
@@ -366,7 +366,7 @@ struct get_page_nesting_size {
     if (page.src_col_schema != src_col_schema || page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
       return 0;
     }
-    return page.nesting[depth].size;
+    return page.nesting[depth].batch_size;
   }
 };
 
@@ -596,11 +596,10 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
                                     bool uses_custom_row_bounds)
 {
   // computes:
-  // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
-  // PageInfo::skipped_values, which tells us where to start decoding in the input to respect the
-  // user bounds.
-  // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
-  // specified artifical bounds).
+  // PageNestingInfo::batch_size for each level of nesting, for each page, taking row bounds into
+  // account. PageInfo::skipped_values, which tells us where to start decoding in the input to
+  // respect the user bounds. It is only necessary to do this second pass if uses_custom_row_bounds
+  // is set (if the user has specified artifical bounds).
   if (uses_custom_row_bounds) {
     gpu::ComputePageSizes(pages,
                           chunks,
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 2f5b0155809..9c2ad89754f 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -177,7 +177,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadBoundaryCases)
     auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
     return std::tuple{std::move(input_table), std::move(result), num_chunks};
   };
-  
+
   // test with a limit slightly less than one page of data
   {
     auto [expected, result, num_chunks] = do_test(79'000);
@@ -199,7 +199,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadBoundaryCases)
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
-   // test with a limit slightly less than two pages of data
+  // test with a limit slightly less than two pages of data
   {
     auto [expected, result, num_chunks] = do_test(159'000);
     EXPECT_EQ(num_chunks, 2);
@@ -218,7 +218,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadBoundaryCases)
     auto [expected, result, num_chunks] = do_test(161'000);
     EXPECT_EQ(num_chunks, 1);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
-  }  
+  }
 }
 
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)

From 0c2178d9d5f0dc4253c82b97771a4c505c537b27 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 27 Oct 2022 12:42:56 -0700
Subject: [PATCH 110/162] Adding doxygen, refactoring and cleaning up

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/detail/parquet.hpp       |    2 +-
 cpp/src/io/parquet/parquet_gpu.hpp           |    1 -
 cpp/src/io/parquet/reader_impl.cu            |  710 +++---------
 cpp/src/io/parquet/reader_impl.hpp           |  121 +-
 cpp/src/io/parquet/reader_impl_helpers.cu    |   31 +-
 cpp/src/io/parquet/reader_impl_helpers.cuh   |   37 +-
 cpp/src/io/parquet/reader_impl_preprocess.cu | 1094 ++++++++++++------
 7 files changed, 992 insertions(+), 1004 deletions(-)

diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp
index 14fe5847e1f..0697f8282f4 100644
--- a/cpp/include/cudf/io/detail/parquet.hpp
+++ b/cpp/include/cudf/io/detail/parquet.hpp
@@ -92,7 +92,7 @@ class chunked_reader : reader {
   /**
    * @brief Constructor from a read limit and an array of data sources with reader options.
    *
-   * @param chunk_read_limit The byte size limit to read each chunk
+   * @param chunk_read_limit The size limit (in bytes) to read each chunk
    * @param sources Input `datasource` objects to read the dataset from
    * @param options Settings for controlling reading behavior
    * @param stream CUDA stream used for device memory operations and kernel launches.
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index ab4f873da78..509f9135283 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -257,7 +257,6 @@ struct file_intermediate_data {
   hostdevice_vector<gpu::ColumnChunkDesc> chunks{};
   hostdevice_vector<gpu::PageInfo> pages_info{};
   hostdevice_vector<gpu::PageNestingInfo> page_nesting_info{};
-  bool has_data{false};
 };
 
 // TODO: rename?
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 434ae3391bc..744f7dcc26b 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -15,10 +15,6 @@
  */
 
 #include "reader_impl.hpp"
-#include "reader_impl_helpers.cuh"
-
-#include <io/comp/nvcomp_adapter.hpp>
-#include <io/utilities/config_utils.hpp>
 
 #include <cudf/detail/utilities/vector_factories.hpp>
 
@@ -27,23 +23,73 @@
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/fill.h>
-#include <thrust/logical.h>
+#include <thrust/iterator/iterator_categories.h>
+#include <thrust/iterator/transform_iterator.h>
+#include <thrust/reduce.h>
+#include <thrust/scan.h>
 
 namespace cudf::io::detail::parquet {
 
 namespace {
 
-void decompress_check(device_span<compression_result const> results, rmm::cuda_stream_view stream)
-{
-  CUDF_EXPECTS(thrust::all_of(rmm::exec_policy(stream),
-                              results.begin(),
-                              results.end(),
-                              [] __device__(auto const& res) {
-                                return res.status == compression_status::SUCCESS;
-                              }),
-               "Error during decompression");
-}
+struct get_page_nesting_size {
+  size_type const src_col_schema;
+  size_type const depth;
+  gpu::PageInfo const* const pages;
+
+  __device__ size_type operator()(int index) const
+  {
+    auto const& page = pages[index];
+    if (page.src_col_schema != src_col_schema || page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
+      return 0;
+    }
+    return page.nesting[depth].size;
+  }
+};
+
+struct start_offset_output_iterator {
+  gpu::PageInfo* pages;
+  int const* page_indices;
+  int cur_index;
+  int src_col_schema;
+  int nesting_depth;
+  int empty               = 0;
+  using value_type        = size_type;
+  using difference_type   = size_type;
+  using pointer           = size_type*;
+  using reference         = size_type&;
+  using iterator_category = thrust::output_device_iterator_tag;
+
+  __host__ __device__ void operator=(start_offset_output_iterator const& other)
+  {
+    pages          = other.pages;
+    page_indices   = other.page_indices;
+    cur_index      = other.cur_index;
+    src_col_schema = other.src_col_schema;
+    nesting_depth  = other.nesting_depth;
+  }
+
+  __host__ __device__ start_offset_output_iterator operator+(int i)
+  {
+    return start_offset_output_iterator{
+      pages, page_indices, cur_index + i, src_col_schema, nesting_depth};
+  }
+
+  __host__ __device__ void operator++() { cur_index++; }
+
+  __device__ reference operator[](int i) { return dereference(cur_index + i); }
+  __device__ reference operator*() { return dereference(cur_index); }
+
+ private:
+  __device__ reference dereference(int index)
+  {
+    gpu::PageInfo const& p = pages[page_indices[index]];
+    if (p.src_col_schema != src_col_schema || p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
+      return empty;
+    }
+    return p.nesting[nesting_depth].page_start_value;
+  }
+};
 
 /**
  * @brief Recursively copy the output buffer from one to another.
@@ -65,507 +111,110 @@ void copy_output_buffer(column_buffer const& buff, column_buffer& new_buff)
 
 }  // namespace
 
-/**
- * @brief Generate depth remappings for repetition and definition levels.
- *
- * When dealing with columns that contain lists, we must examine incoming
- * repetition and definition level pairs to determine what range of output nesting
- * is indicated when adding new values.  This function generates the mappings of
- * the R/D levels to those start/end bounds
- *
- * @param remap Maps column schema index to the R/D remapping vectors for that column
- * @param src_col_schema The column schema to generate the new mapping for
- * @param md File metadata information
- */
-void generate_depth_remappings(std::map<int, std::pair<std::vector<int>, std::vector<int>>>& remap,
-                               int src_col_schema,
-                               aggregate_reader_metadata const& md)
+void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                                    hostdevice_vector<gpu::PageInfo>& pages,
+                                    gpu::chunk_intermediate_data const& id,
+                                    size_t min_row,
+                                    size_t num_rows,
+                                    bool uses_custom_row_bounds)
 {
-  // already generated for this level
-  if (remap.find(src_col_schema) != remap.end()) { return; }
-  auto schema   = md.get_schema(src_col_schema);
-  int max_depth = md.get_output_nesting_depth(src_col_schema);
-
-  CUDF_EXPECTS(remap.find(src_col_schema) == remap.end(),
-               "Attempting to remap a schema more than once");
-  auto inserted =
-    remap.insert(std::pair<int, std::pair<std::vector<int>, std::vector<int>>>{src_col_schema, {}});
-  auto& depth_remap = inserted.first->second;
-
-  std::vector<int>& rep_depth_remap = (depth_remap.first);
-  rep_depth_remap.resize(schema.max_repetition_level + 1);
-  std::vector<int>& def_depth_remap = (depth_remap.second);
-  def_depth_remap.resize(schema.max_definition_level + 1);
-
-  // the key:
-  // for incoming level values  R/D
-  // add values starting at the shallowest nesting level X has repetition level R
-  // until you reach the deepest nesting level Y that corresponds to the repetition level R1
-  // held by the nesting level that has definition level D
-  //
-  // Example: a 3 level struct with a list at the bottom
-  //
-  //                     R / D   Depth
-  // level0              0 / 1     0
-  //   level1            0 / 2     1
-  //     level2          0 / 3     2
-  //       list          0 / 3     3
-  //         element     1 / 4     4
-  //
-  // incoming R/D : 0, 0  -> add values from depth 0 to 3   (def level 0 always maps to depth 0)
-  // incoming R/D : 0, 1  -> add values from depth 0 to 3
-  // incoming R/D : 0, 2  -> add values from depth 0 to 3
-  // incoming R/D : 1, 4  -> add values from depth 4 to 4
-  //
-  // Note : the -validity- of values is simply checked by comparing the incoming D value against the
-  // D value of the given nesting level (incoming D >= the D for the nesting level == valid,
-  // otherwise NULL).  The tricky part is determining what nesting levels to add values at.
-  //
-  // For schemas with no repetition level (no lists), X is always 0 and Y is always max nesting
-  // depth.
-  //
-
-  // compute "X" from above
-  for (int s_idx = schema.max_repetition_level; s_idx >= 0; s_idx--) {
-    auto find_shallowest = [&](int r) {
-      int shallowest = -1;
-      int cur_depth  = max_depth - 1;
-      int schema_idx = src_col_schema;
-      while (schema_idx > 0) {
-        auto cur_schema = md.get_schema(schema_idx);
-        if (cur_schema.max_repetition_level == r) {
-          // if this is a repeated field, map it one level deeper
-          shallowest = cur_schema.is_stub() ? cur_depth + 1 : cur_depth;
-        }
-        // if it's one-level encoding list
-        else if (cur_schema.is_one_level_list()) {
-          shallowest = cur_depth - 1;
-        }
-        if (!cur_schema.is_stub()) { cur_depth--; }
-        schema_idx = cur_schema.parent_idx;
-      }
-      return shallowest;
-    };
-    rep_depth_remap[s_idx] = find_shallowest(s_idx);
+  // computes:
+  // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
+  // PageInfo::skipped_values, which tells us where to start decoding in the input to respect the
+  // user bounds.
+  // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
+  // specified artifical bounds).
+  if (uses_custom_row_bounds) {
+    gpu::ComputePageSizes(pages,
+                          chunks,
+                          min_row,
+                          num_rows,
+                          false,  // num_rows is already computed
+                          false,  // no need to compute string sizes
+                          _stream);
+    // print_pages(pages, _stream);
   }
 
-  // compute "Y" from above
-  for (int s_idx = schema.max_definition_level; s_idx >= 0; s_idx--) {
-    auto find_deepest = [&](int d) {
-      SchemaElement prev_schema;
-      int schema_idx = src_col_schema;
-      int r1         = 0;
-      while (schema_idx > 0) {
-        SchemaElement cur_schema = md.get_schema(schema_idx);
-        if (cur_schema.max_definition_level == d) {
-          // if this is a repeated field, map it one level deeper
-          r1 = cur_schema.is_stub() ? prev_schema.max_repetition_level
-                                    : cur_schema.max_repetition_level;
-          break;
-        }
-        prev_schema = cur_schema;
-        schema_idx  = cur_schema.parent_idx;
-      }
+  // iterate over all input columns and allocate any associated output
+  // buffers if they are not part of a list hierarchy. mark down
+  // if we have any list columns that need further processing.
+  bool has_lists = false;
+  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+    auto const& input_col  = _input_columns[idx];
+    size_t const max_depth = input_col.nesting_depth();
 
-      // we now know R1 from above. return the deepest nesting level that has the
-      // same repetition level
-      schema_idx = src_col_schema;
-      int depth  = max_depth - 1;
-      while (schema_idx > 0) {
-        SchemaElement cur_schema = md.get_schema(schema_idx);
-        if (cur_schema.max_repetition_level == r1) {
-          // if this is a repeated field, map it one level deeper
-          depth = cur_schema.is_stub() ? depth + 1 : depth;
-          break;
-        }
-        if (!cur_schema.is_stub()) { depth--; }
-        prev_schema = cur_schema;
-        schema_idx  = cur_schema.parent_idx;
-      }
-      return depth;
-    };
-    def_depth_remap[s_idx] = find_deepest(s_idx);
-  }
-}
+    auto* cols = &_output_buffers;
+    for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
+      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+      cols          = &out_buf.children;
 
-std::future<void> reader::impl::read_column_chunks(
-  std::vector<std::unique_ptr<datasource::buffer>>& page_data,
-  hostdevice_vector<gpu::ColumnChunkDesc>& chunks,  // TODO const?
-  size_t begin_chunk,
-  size_t end_chunk,
-  const std::vector<size_t>& column_chunk_offsets,
-  std::vector<size_type> const& chunk_source_map)
-{
-  // Transfer chunk data, coalescing adjacent chunks
-  std::vector<std::future<size_t>> read_tasks;
-  for (size_t chunk = begin_chunk; chunk < end_chunk;) {
-    const size_t io_offset   = column_chunk_offsets[chunk];
-    size_t io_size           = chunks[chunk].compressed_size;
-    size_t next_chunk        = chunk + 1;
-    const bool is_compressed = (chunks[chunk].codec != parquet::Compression::UNCOMPRESSED);
-    while (next_chunk < end_chunk) {
-      const size_t next_offset = column_chunk_offsets[next_chunk];
-      const bool is_next_compressed =
-        (chunks[next_chunk].codec != parquet::Compression::UNCOMPRESSED);
-      if (next_offset != io_offset + io_size || is_next_compressed != is_compressed) {
-        // Can't merge if not contiguous or mixing compressed and uncompressed
-        // Not coalescing uncompressed with compressed chunks is so that compressed buffers can be
-        // freed earlier (immediately after decompression stage) to limit peak memory requirements
-        break;
+      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
+      // to know how big this buffer actually is.
+      if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
+        has_lists = true;
       }
-      io_size += chunks[next_chunk].compressed_size;
-      next_chunk++;
-    }
-    if (io_size != 0) {
-      auto& source = _sources[chunk_source_map[chunk]];
-      if (source->is_device_read_preferred(io_size)) {
-        auto buffer        = rmm::device_buffer(io_size, _stream);
-        auto fut_read_size = source->device_read_async(
-          io_offset, io_size, static_cast<uint8_t*>(buffer.data()), _stream);
-        read_tasks.emplace_back(std::move(fut_read_size));
-        page_data[chunk] = datasource::buffer::create(std::move(buffer));
-      } else {
-        auto const buffer = source->host_read(io_offset, io_size);
-        page_data[chunk] =
-          datasource::buffer::create(rmm::device_buffer(buffer->data(), buffer->size(), _stream));
+      // if we haven't already processed this column because it is part of a struct hierarchy
+      else if (out_buf.size == 0) {
+        // add 1 for the offset if this is a list column
+        out_buf.create(
+          out_buf.type.id() == type_id::LIST && l_idx < max_depth ? num_rows + 1 : num_rows,
+          _stream,
+          _mr);
       }
-      auto d_compdata = page_data[chunk]->data();
-      do {
-        chunks[chunk].compressed_data = d_compdata;
-        d_compdata += chunks[chunk].compressed_size;
-      } while (++chunk != next_chunk);
-    } else {
-      chunk = next_chunk;
     }
   }
-  auto sync_fn = [](decltype(read_tasks) read_tasks) {
-    for (auto& task : read_tasks) {
-      task.wait();
-    }
-  };
-  return std::async(std::launch::deferred, sync_fn, std::move(read_tasks));
-}
 
-size_t reader::impl::count_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks)
-{
-  size_t total_pages = 0;
-
-  chunks.host_to_device(_stream);
-  gpu::DecodePageHeaders(chunks.device_ptr(), chunks.size(), _stream);
-  chunks.device_to_host(_stream, true);
-
-  for (size_t c = 0; c < chunks.size(); c++) {
-    total_pages += chunks[c].num_data_pages + chunks[c].num_dict_pages;
-  }
-
-  return total_pages;
-}
-
-void reader::impl::decode_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                       hostdevice_vector<gpu::PageInfo>& pages)
-{
-  // IMPORTANT : if you change how pages are stored within a chunk (dist pages, then data pages),
-  // please update preprocess_nested_columns to reflect this.
-  for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
-    chunks[c].max_num_pages = chunks[c].num_data_pages + chunks[c].num_dict_pages;
-    chunks[c].page_info     = pages.device_ptr(page_count);
-    page_count += chunks[c].max_num_pages;
-  }
-
-  chunks.host_to_device(_stream);
-  gpu::DecodePageHeaders(chunks.device_ptr(), chunks.size(), _stream);
-  pages.device_to_host(_stream, true);
-}
-
-rmm::device_buffer reader::impl::decompress_page_data(
-  hostdevice_vector<gpu::ColumnChunkDesc>& chunks, hostdevice_vector<gpu::PageInfo>& pages)
-{
-  auto for_each_codec_page = [&](parquet::Compression codec, const std::function<void(size_t)>& f) {
-    for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
-      const auto page_stride = chunks[c].max_num_pages;
-      if (chunks[c].codec == codec) {
-        for (int k = 0; k < page_stride; k++) {
-          f(page_count + k);
+  // compute output column sizes by examining the pages of the -input- columns
+  if (has_lists) {
+    auto& page_keys  = _chunk_itm_data.page_keys;
+    auto& page_index = _chunk_itm_data.page_index;
+    for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+      auto const& input_col = _input_columns[idx];
+      auto src_col_schema   = input_col.schema_idx;
+      size_t max_depth      = input_col.nesting_depth();
+
+      auto* cols = &_output_buffers;
+      for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
+        auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+        cols          = &out_buf.children;
+
+        // size iterator. indexes pages by sorted order
+        auto size_input = thrust::make_transform_iterator(
+          page_index.begin(),
+          get_page_nesting_size{src_col_schema, static_cast<size_type>(l_idx), pages.device_ptr()});
+
+        // if this buffer is part of a list hierarchy, we need to determine it's
+        // final size and allocate it here.
+        //
+        // for struct columns, higher levels of the output columns are shared between input
+        // columns. so don't compute any given level more than once.
+        if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) && out_buf.size == 0) {
+          int size =
+            thrust::reduce(rmm::exec_policy(_stream), size_input, size_input + pages.size());
+
+          // if this is a list column add 1 for non-leaf levels for the terminating offset
+          if (out_buf.type.id() == type_id::LIST && l_idx < max_depth) { size++; }
+
+          // allocate
+          out_buf.create(size, _stream, _mr);
         }
-      }
-      page_count += page_stride;
-    }
-  };
-
-  // Brotli scratch memory for decompressing
-  rmm::device_buffer debrotli_scratch;
-
-  // Count the exact number of compressed pages
-  size_t num_comp_pages    = 0;
-  size_t total_decomp_size = 0;
-
-  struct codec_stats {
-    parquet::Compression compression_type = UNCOMPRESSED;
-    size_t num_pages                      = 0;
-    int32_t max_decompressed_size         = 0;
-    size_t total_decomp_size              = 0;
-  };
-
-  std::array codecs{codec_stats{parquet::GZIP},
-                    codec_stats{parquet::SNAPPY},
-                    codec_stats{parquet::BROTLI},
-                    codec_stats{parquet::ZSTD}};
-
-  auto is_codec_supported = [&codecs](int8_t codec) {
-    if (codec == parquet::UNCOMPRESSED) return true;
-    return std::find_if(codecs.begin(), codecs.end(), [codec](auto& cstats) {
-             return codec == cstats.compression_type;
-           }) != codecs.end();
-  };
-  CUDF_EXPECTS(std::all_of(chunks.begin(),
-                           chunks.end(),
-                           [&is_codec_supported](auto const& chunk) {
-                             return is_codec_supported(chunk.codec);
-                           }),
-               "Unsupported compression type");
-
-  for (auto& codec : codecs) {
-    for_each_codec_page(codec.compression_type, [&](size_t page) {
-      auto page_uncomp_size = pages[page].uncompressed_page_size;
-      total_decomp_size += page_uncomp_size;
-      codec.total_decomp_size += page_uncomp_size;
-      codec.max_decompressed_size = std::max(codec.max_decompressed_size, page_uncomp_size);
-      codec.num_pages++;
-      num_comp_pages++;
-    });
-    if (codec.compression_type == parquet::BROTLI && codec.num_pages > 0) {
-      debrotli_scratch.resize(get_gpu_debrotli_scratch_size(codec.num_pages), _stream);
-    }
-  }
-
-  // Dispatch batches of pages to decompress for each codec
-  rmm::device_buffer decomp_pages(total_decomp_size, _stream);
-
-  std::vector<device_span<uint8_t const>> comp_in;
-  comp_in.reserve(num_comp_pages);
-  std::vector<device_span<uint8_t>> comp_out;
-  comp_out.reserve(num_comp_pages);
-
-  // vectors to save v2 def and rep level data, if any
-  std::vector<device_span<uint8_t const>> copy_in;
-  copy_in.reserve(num_comp_pages);
-  std::vector<device_span<uint8_t>> copy_out;
-  copy_out.reserve(num_comp_pages);
-
-  rmm::device_uvector<compression_result> comp_res(num_comp_pages, _stream);
-  thrust::fill(rmm::exec_policy(_stream),
-               comp_res.begin(),
-               comp_res.end(),
-               compression_result{0, compression_status::FAILURE});
-
-  size_t decomp_offset = 0;
-  int32_t start_pos    = 0;
-  for (const auto& codec : codecs) {
-    if (codec.num_pages == 0) { continue; }
-
-    for_each_codec_page(codec.compression_type, [&](size_t page_idx) {
-      auto const dst_base = static_cast<uint8_t*>(decomp_pages.data()) + decomp_offset;
-      auto& page          = pages[page_idx];
-      // offset will only be non-zero for V2 pages
-      auto const offset = page.def_lvl_bytes + page.rep_lvl_bytes;
-      // for V2 need to copy def and rep level info into place, and then offset the
-      // input and output buffers. otherwise we'd have to keep both the compressed
-      // and decompressed data.
-      if (offset != 0) {
-        copy_in.emplace_back(page.page_data, offset);
-        copy_out.emplace_back(dst_base, offset);
-      }
-      comp_in.emplace_back(page.page_data + offset,
-                           static_cast<size_t>(page.compressed_page_size - offset));
-      comp_out.emplace_back(dst_base + offset,
-                            static_cast<size_t>(page.uncompressed_page_size - offset));
-      page.page_data = dst_base;
-      decomp_offset += page.uncompressed_page_size;
-    });
-
-    host_span<device_span<uint8_t const> const> comp_in_view{comp_in.data() + start_pos,
-                                                             codec.num_pages};
-    auto const d_comp_in = cudf::detail::make_device_uvector_async(comp_in_view, _stream);
-    host_span<device_span<uint8_t> const> comp_out_view(comp_out.data() + start_pos,
-                                                        codec.num_pages);
-    auto const d_comp_out = cudf::detail::make_device_uvector_async(comp_out_view, _stream);
-    device_span<compression_result> d_comp_res_view(comp_res.data() + start_pos, codec.num_pages);
-
-    switch (codec.compression_type) {
-      case parquet::GZIP:
-        gpuinflate(d_comp_in, d_comp_out, d_comp_res_view, gzip_header_included::YES, _stream);
-        break;
-      case parquet::SNAPPY:
-        if (nvcomp_integration::is_stable_enabled()) {
-          nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY,
-                                     d_comp_in,
-                                     d_comp_out,
-                                     d_comp_res_view,
-                                     codec.max_decompressed_size,
-                                     codec.total_decomp_size,
-                                     _stream);
-        } else {
-          gpu_unsnap(d_comp_in, d_comp_out, d_comp_res_view, _stream);
-        }
-        break;
-      case parquet::ZSTD:
-        nvcomp::batched_decompress(nvcomp::compression_type::ZSTD,
-                                   d_comp_in,
-                                   d_comp_out,
-                                   d_comp_res_view,
-                                   codec.max_decompressed_size,
-                                   codec.total_decomp_size,
-                                   _stream);
-        break;
-      case parquet::BROTLI:
-        gpu_debrotli(d_comp_in,
-                     d_comp_out,
-                     d_comp_res_view,
-                     debrotli_scratch.data(),
-                     debrotli_scratch.size(),
-                     _stream);
-        break;
-      default: CUDF_FAIL("Unexpected decompression dispatch"); break;
-    }
-    start_pos += codec.num_pages;
-  }
-
-  decompress_check(comp_res, _stream);
 
-  // now copy the uncompressed V2 def and rep level data
-  if (not copy_in.empty()) {
-    auto const d_copy_in  = cudf::detail::make_device_uvector_async(copy_in, _stream);
-    auto const d_copy_out = cudf::detail::make_device_uvector_async(copy_out, _stream);
-
-    gpu_copy_uncompressed_blocks(d_copy_in, d_copy_out, _stream);
-    _stream.synchronize();
-  }
-
-  // Update the page information in device memory with the updated value of
-  // page_data; it now points to the uncompressed data buffer
-  pages.host_to_device(_stream);
-
-  return decomp_pages;
-}
-
-void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc> const& chunks,
-                                         hostdevice_vector<gpu::PageInfo>& pages,
-                                         hostdevice_vector<gpu::PageNestingInfo>& page_nesting_info)
-{
-  // compute total # of page_nesting infos needed and allocate space. doing this in one
-  // buffer to keep it to a single gpu allocation
-  size_t const total_page_nesting_infos = std::accumulate(
-    chunks.host_ptr(), chunks.host_ptr() + chunks.size(), 0, [&](int total, auto& chunk) {
-      // the schema of the input column
-      auto const& schema                    = _metadata->get_schema(chunk.src_col_schema);
-      auto const per_page_nesting_info_size = max(
-        schema.max_definition_level + 1, _metadata->get_output_nesting_depth(chunk.src_col_schema));
-      return total + (per_page_nesting_info_size * chunk.num_data_pages);
-    });
-
-  page_nesting_info = hostdevice_vector<gpu::PageNestingInfo>{total_page_nesting_infos, _stream};
-
-  // retrieve from the gpu so we can update
-  pages.device_to_host(_stream, true);
-
-  // update pointers in the PageInfos
-  int target_page_index = 0;
-  int src_info_index    = 0;
-  for (size_t idx = 0; idx < chunks.size(); idx++) {
-    int src_col_schema                    = chunks[idx].src_col_schema;
-    auto& schema                          = _metadata->get_schema(src_col_schema);
-    auto const per_page_nesting_info_size = std::max(
-      schema.max_definition_level + 1, _metadata->get_output_nesting_depth(src_col_schema));
-    auto const type_id = to_type_id(schema, _strings_to_categorical, _timestamp_type.id());
-
-    // skip my dict pages
-    target_page_index += chunks[idx].num_dict_pages;
-    for (int p_idx = 0; p_idx < chunks[idx].num_data_pages; p_idx++) {
-      pages[target_page_index + p_idx].nesting = page_nesting_info.device_ptr() + src_info_index;
-      pages[target_page_index + p_idx].num_nesting_levels = per_page_nesting_info_size;
-
-      // this isn't the ideal place to be setting this value (it's not obvious this function would
-      // do it) but we don't have any other places that go host->device with the pages and I'd like
-      // to avoid another copy
-      pages[target_page_index + p_idx].type = type_id;
-
-      src_info_index += per_page_nesting_info_size;
-    }
-    target_page_index += chunks[idx].num_data_pages;
-  }
-
-  // copy back to the gpu
-  pages.host_to_device(_stream);
-
-  // fill in
-  int nesting_info_index = 0;
-  std::map<int, std::pair<std::vector<int>, std::vector<int>>> depth_remapping;
-  for (size_t idx = 0; idx < chunks.size(); idx++) {
-    int src_col_schema = chunks[idx].src_col_schema;
-
-    // schema of the input column
-    auto& schema = _metadata->get_schema(src_col_schema);
-    // real depth of the output cudf column hierarchy (1 == no nesting, 2 == 1 level, etc)
-    int max_depth = _metadata->get_output_nesting_depth(src_col_schema);
-
-    // # of nesting infos stored per page for this column
-    auto const per_page_nesting_info_size = std::max(schema.max_definition_level + 1, max_depth);
-
-    // if this column has lists, generate depth remapping
-    std::map<int, std::pair<std::vector<int>, std::vector<int>>> depth_remapping;
-    if (schema.max_repetition_level > 0) {
-      generate_depth_remappings(depth_remapping, src_col_schema, *_metadata);
-    }
-
-    // fill in host-side nesting info
-    int schema_idx  = src_col_schema;
-    auto cur_schema = _metadata->get_schema(schema_idx);
-    int cur_depth   = max_depth - 1;
-    while (schema_idx > 0) {
-      // stub columns (basically the inner field of a list scheme element) are not real columns.
-      // we can ignore them for the purposes of output nesting info
-      if (!cur_schema.is_stub()) {
-        // initialize each page within the chunk
-        for (int p_idx = 0; p_idx < chunks[idx].num_data_pages; p_idx++) {
-          gpu::PageNestingInfo* pni =
-            &page_nesting_info[nesting_info_index + (p_idx * per_page_nesting_info_size)];
-
-          // if we have lists, set our start and end depth remappings
-          if (schema.max_repetition_level > 0) {
-            auto remap = depth_remapping.find(src_col_schema);
-            CUDF_EXPECTS(remap != depth_remapping.end(),
-                         "Could not find depth remapping for schema");
-            std::vector<int> const& rep_depth_remap = (remap->second.first);
-            std::vector<int> const& def_depth_remap = (remap->second.second);
-
-            for (size_t m = 0; m < rep_depth_remap.size(); m++) {
-              pni[m].start_depth = rep_depth_remap[m];
-            }
-            for (size_t m = 0; m < def_depth_remap.size(); m++) {
-              pni[m].end_depth = def_depth_remap[m];
-            }
-          }
-
-          // values indexed by output column index
-          pni[cur_depth].max_def_level = cur_schema.max_definition_level;
-          pni[cur_depth].max_rep_level = cur_schema.max_repetition_level;
-          pni[cur_depth].size          = 0;
+        // for nested hierarchies, compute per-page start offset
+        if (input_col.has_repetition) {
+          thrust::exclusive_scan_by_key(
+            rmm::exec_policy(_stream),
+            page_keys.begin(),
+            page_keys.end(),
+            size_input,
+            start_offset_output_iterator{pages.device_ptr(),
+                                         page_index.begin(),
+                                         0,
+                                         static_cast<int>(src_col_schema),
+                                         static_cast<int>(l_idx)});
         }
-
-        // move up the hierarchy
-        cur_depth--;
       }
-
-      // next schema
-      schema_idx = cur_schema.parent_idx;
-      cur_schema = _metadata->get_schema(schema_idx);
     }
-
-    nesting_info_index += (per_page_nesting_info_size * chunks[idx].num_data_pages);
   }
-
-  // copy nesting info to the device
-  page_nesting_info.host_to_device(_stream);
 }
 
 void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
@@ -642,7 +291,7 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
     //
     // we do this by only handing out the pointers to the first child we come across.
     //
-    auto* cols = &_output_columns;
+    auto* cols = &_output_buffers;
     for (size_t idx = 0; idx < max_depth; idx++) {
       auto& out_buf = (*cols)[input_col.nesting[idx]];
       cols          = &out_buf.children;
@@ -685,7 +334,7 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   for (size_t idx = 0; idx < _input_columns.size(); idx++) {
     input_column_info const& input_col = _input_columns[idx];
 
-    auto* cols = &_output_columns;
+    auto* cols = &_output_buffers;
     for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
       auto& out_buf = (*cols)[input_col.nesting[l_idx]];
       cols          = &out_buf.children;
@@ -718,7 +367,7 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
     int index                 = pi->nesting - page_nesting.device_ptr();
     gpu::PageNestingInfo* pni = &page_nesting[index];
 
-    auto* cols = &_output_columns;
+    auto* cols = &_output_buffers;
     for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
       auto& out_buf = (*cols)[input_col.nesting[l_idx]];
       cols          = &out_buf.children;
@@ -755,7 +404,7 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
   _reader_column_schema = options.get_column_schema();
 
   // Select only columns required by the options
-  std::tie(_input_columns, _output_columns, _output_column_schemas) =
+  std::tie(_input_columns, _output_buffers, _output_column_schemas) =
     _metadata->select_columns(options.get_columns(),
                               options.is_enabled_use_pandas_metadata(),
                               _strings_to_categorical,
@@ -775,25 +424,26 @@ reader::impl::impl(std::size_t chunk_read_limit,
   _chunk_read_limit = chunk_read_limit;
 
   // Save the states of the output buffers for reuse in `chunk_read()`.
-  for (auto const& buff : _output_columns) {
+  for (auto const& buff : _output_buffers) {
     auto& new_buff =
-      _output_columns_template.emplace_back(column_buffer(buff.type, buff.is_nullable));
+      _output_buffers_template.emplace_back(column_buffer(buff.type, buff.is_nullable));
     copy_output_buffer(buff, new_buff);
   }
 }
 
-void reader::impl::preprocess_file_and_columns(
-  size_type skip_rows,
-  size_type num_rows,
-  bool uses_custom_row_bounds,
-  std::vector<std::vector<size_type>> const& row_group_list)
+void reader::impl::prepare_data(size_type skip_rows,
+                                size_type num_rows,
+                                bool uses_custom_row_bounds,
+                                std::vector<std::vector<size_type>> const& row_group_list)
 {
   if (_file_preprocessed) { return; }
 
-  auto [skip_rows_corrected, num_rows_corrected] =
-    preprocess_file(skip_rows, num_rows, row_group_list);
+  const auto [skip_rows_corrected, num_rows_corrected, row_groups_info] =
+    _metadata->select_row_groups(row_group_list, skip_rows, num_rows);
+
+  if (num_rows_corrected > 0 && row_groups_info.size() != 0 && _input_columns.size() != 0) {
+    load_and_decompress_data(row_groups_info, num_rows_corrected);
 
-  if (_file_itm_data.has_data) {
     preprocess_columns(_file_itm_data.chunks,
                        _file_itm_data.pages_info,
                        skip_rows_corrected,
@@ -817,7 +467,7 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
 
   // output cudf columns as determined by the top level schema
   auto out_columns = std::vector<std::unique_ptr<column>>{};
-  out_columns.reserve(_output_columns.size());
+  out_columns.reserve(_output_buffers.size());
 
   if (!has_next()) { return finalize_output(out_metadata, out_columns); }
 
@@ -842,16 +492,16 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
                    read_info.num_rows);
 
   // create the final output cudf columns
-  for (size_t i = 0; i < _output_columns.size(); ++i) {
+  for (size_t i = 0; i < _output_buffers.size(); ++i) {
     auto const metadata = _reader_column_schema.has_value()
                             ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
                             : std::nullopt;
     // Only construct `out_metadata` if `_output_metadata` has not been cached.
     if (!_output_metadata) {
       column_name_info& col_name = out_metadata.schema_info.emplace_back("");
-      out_columns.emplace_back(make_column(_output_columns[i], &col_name, metadata, _stream, _mr));
+      out_columns.emplace_back(make_column(_output_buffers[i], &col_name, metadata, _stream, _mr));
     } else {
-      out_columns.emplace_back(make_column(_output_columns[i], nullptr, metadata, _stream, _mr));
+      out_columns.emplace_back(make_column(_output_buffers[i], nullptr, metadata, _stream, _mr));
     }
   }
 
@@ -862,18 +512,18 @@ table_with_metadata reader::impl::finalize_output(table_metadata& out_metadata,
                                                   std::vector<std::unique_ptr<column>>& out_columns)
 {
   // Create empty columns as needed (this can happen if we've ended up with no actual data to read)
-  for (size_t i = out_columns.size(); i < _output_columns.size(); ++i) {
+  for (size_t i = out_columns.size(); i < _output_buffers.size(); ++i) {
     if (!_output_metadata) {
       column_name_info& col_name = out_metadata.schema_info.emplace_back("");
-      out_columns.emplace_back(io::detail::empty_like(_output_columns[i], &col_name, _stream, _mr));
+      out_columns.emplace_back(io::detail::empty_like(_output_buffers[i], &col_name, _stream, _mr));
     } else {
-      out_columns.emplace_back(io::detail::empty_like(_output_columns[i], nullptr, _stream, _mr));
+      out_columns.emplace_back(io::detail::empty_like(_output_buffers[i], nullptr, _stream, _mr));
     }
   }
 
   if (!_output_metadata) {
     // Return column names (must match order of returned columns)
-    out_metadata.column_names.resize(_output_columns.size());
+    out_metadata.column_names.resize(_output_buffers.size());
     for (size_t i = 0; i < _output_column_schemas.size(); i++) {
       auto const& schema           = _metadata->get_schema(_output_column_schemas[i]);
       out_metadata.column_names[i] = schema.name;
@@ -898,12 +548,12 @@ table_with_metadata reader::impl::read(size_type skip_rows,
                                        std::vector<std::vector<size_type>> const& row_group_list)
 {
 #if defined(ALLOW_PLAIN_READ_CHUNK_LIMIT)
-  preprocess_file_and_columns(
+  prepare_data(
     skip_rows, num_rows, uses_custom_row_bounds || _chunk_read_limit > 0, row_group_list);
   return read_chunk_internal(uses_custom_row_bounds || _chunk_read_limit > 0);
 #else
   CUDF_EXPECTS(_chunk_read_limit == 0, "Reading the whole file must not have non-zero byte_limit.");
-  preprocess_file_and_columns(skip_rows, num_rows, uses_custom_row_bounds, row_group_list);
+  prepare_data(skip_rows, num_rows, uses_custom_row_bounds, row_group_list);
   return read_chunk_internal(uses_custom_row_bounds);
 #endif
 }
@@ -911,19 +561,19 @@ table_with_metadata reader::impl::read(size_type skip_rows,
 table_with_metadata reader::impl::read_chunk()
 {
   // Reset the output buffers to their original states (right after reader construction).
-  _output_columns.resize(0);
-  for (auto const& buff : _output_columns_template) {
-    auto& new_buff = _output_columns.emplace_back(column_buffer(buff.type, buff.is_nullable));
+  _output_buffers.resize(0);
+  for (auto const& buff : _output_buffers_template) {
+    auto& new_buff = _output_buffers.emplace_back(column_buffer(buff.type, buff.is_nullable));
     copy_output_buffer(buff, new_buff);
   }
 
-  preprocess_file_and_columns(0, -1, true, {});
+  prepare_data(0, -1, true, {});
   return read_chunk_internal(true);
 }
 
 bool reader::impl::has_next()
 {
-  preprocess_file_and_columns(0, -1, true, {});
+  prepare_data(0, -1, true, {});
   return _current_read_chunk < _chunk_read_info.size();
 }
 
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index c53c72b3200..b936c027ef1 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -23,6 +23,7 @@
 
 #include "parquet.hpp"
 #include "parquet_gpu.hpp"
+#include "reader_impl_helpers.cuh"
 
 #include <io/utilities/column_buffer.hpp>
 #include <io/utilities/hostdevice_vector.hpp>
@@ -83,7 +84,9 @@ class reader::impl {
   /**
    * @brief Constructor from a chunk read limit and an array of dataset sources with reader options.
    *
-   * @param chunk_read_limit The byte size limit to read each chunk
+   * By using this constructor, the reader will supports chunked reading with read size limit.
+   *
+   * @param chunk_read_limit The size limit (in bytes) to read each chunk
    * @param sources Dataset sources
    * @param options Settings for controlling reading behavior
    * @param stream CUDA stream used for device memory operations and kernel launches
@@ -96,101 +99,57 @@ class reader::impl {
                 rmm::mr::device_memory_resource* mr);
 
   /**
-   * TODO
-   *
-   * @brief read_chunk
-   * @param chunk_info
-   * @return
-   */
-  table_with_metadata read_chunk();
-
-  /**
-   * TODO
-   *
-   * @brief read_completed
-   * @return
+   * @copydoc cudf::io::chunked_parquet_reader::has_next
    */
   bool has_next();
 
- private:
-  // TODO
-  void preprocess_file_and_columns(size_type skip_rows,
-                                   size_type num_rows,
-                                   bool uses_custom_row_bounds,
-                                   const std::vector<std::vector<size_type>>& row_group_list);
-
-  // TODO
-  table_with_metadata read_chunk_internal(bool uses_custom_row_bounds);
-
-  /**
-   * TODO
-   *
-   * @brief load_column_chunk_descriotions
-   * @return
-   */
-  std::pair<size_type, size_type> preprocess_file(
-    size_type skip_rows,
-    size_type num_rows,
-    std::vector<std::vector<size_type>> const& row_group_list);
-
   /**
-   * TODO
-   *
-   * @brief make_output
-   * @param out_metadata
-   * @param out_columns
-   * @return
+   * @copydoc cudf::io::chunked_parquet_reader::read_chunk
    */
-  table_with_metadata finalize_output(table_metadata& out_metadata,
-                                      std::vector<std::unique_ptr<column>>& out_columns);
+  table_with_metadata read_chunk();
 
+ private:
   /**
-   * TODO: Rename this into something more meaningful
-   *
-   * @brief Reads compressed page data to device memory
-   *
-   * @param page_data Buffers to hold compressed page data for each chunk
-   * @param chunks List of column chunk descriptors
-   * @param begin_chunk Index of first column chunk to read
-   * @param end_chunk Index after the last column chunk to read
-   * @param column_chunk_offsets File offset for all chunks
+   * @brief Perform the necessary data preprocessing for reading data later on.
    *
+   * @param skip_rows Number of rows to skip from the start
+   * @param num_rows Number of rows to read
+   * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific
+   *        bounds
+   * @param row_group_indices Lists of row groups to read, one per source
    */
-  std::future<void> read_column_chunks(std::vector<std::unique_ptr<datasource::buffer>>& page_data,
-                                       hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                       size_t begin_chunk,
-                                       size_t end_chunk,
-                                       const std::vector<size_t>& column_chunk_offsets,
-                                       std::vector<size_type> const& chunk_source_map);
+  void prepare_data(size_type skip_rows,
+                    size_type num_rows,
+                    bool uses_custom_row_bounds,
+                    const std::vector<std::vector<size_type>>& row_group_list);
 
   /**
-   * @brief Returns the number of total pages from the given column chunks
+   * @brief Read a chunk of data and return an output table.
    *
-   * @param chunks List of column chunk descriptors
+   * This function is called internally and expects all preprocessing steps have been done.
    *
-   * @return The total number of pages
+   * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific
+   *        bounds
+   * @return The output table along with columns' metadata
    */
-  size_t count_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks);
+  table_with_metadata read_chunk_internal(bool uses_custom_row_bounds);
 
   /**
-   * @brief Returns the page information from the given column chunks.
-   *
-   * @param chunks List of column chunk descriptors
-   * @param pages List of page information
+   * @brief Load and decompress the input file(s) into memory.
    */
-  void decode_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                           hostdevice_vector<gpu::PageInfo>& pages);
+  void load_and_decompress_data(std::vector<row_group_info> const& row_groups_info,
+                                size_type num_rows);
 
   /**
-   * @brief Decompresses the page data, at page granularity.
+   * @brief Finalize the output table by adding empty columns for the non-selected columns in
+   * schema.
    *
-   * @param chunks List of column chunk descriptors
-   * @param pages List of page information
-   *
-   * @return Device buffer to decompressed page data
+   * @param out_metadata The output table metadata
+   * @param out_columns The columns for building the output table
+   * @return
    */
-  rmm::device_buffer decompress_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                          hostdevice_vector<gpu::PageInfo>& pages);
+  table_with_metadata finalize_output(table_metadata& out_metadata,
+                                      std::vector<std::unique_ptr<column>>& out_columns);
 
   /**
    * @brief Allocate nesting information storage for all pages and set pointers
@@ -279,16 +238,16 @@ class reader::impl {
   // input columns to be processed
   std::vector<input_column_info> _input_columns;
 
-  // output columns to be generated
-  std::vector<column_buffer> _output_columns;
+  // Buffers for generating output columns
+  std::vector<column_buffer> _output_buffers;
 
-  // data of output columns saved after construction for reuse
-  std::vector<column_buffer> _output_columns_template;
+  // Buffers copied from `_output_buffers` after construction for reuse
+  std::vector<column_buffer> _output_buffers_template;
 
-  // _output_columns associated schema indices
+  // _output_buffers associated schema indices
   std::vector<int> _output_column_schemas;
 
-  // _output_columns associated metadata
+  // _output_buffers associated metadata
   std::unique_ptr<table_metadata> _output_metadata;
 
   bool _strings_to_categorical = false;
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cu b/cpp/src/io/parquet/reader_impl_helpers.cu
index b519e4c8dc0..b8dc10e4559 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cu
+++ b/cpp/src/io/parquet/reader_impl_helpers.cu
@@ -326,19 +326,21 @@ std::vector<std::string> aggregate_reader_metadata::get_pandas_index_names() con
   return names;
 }
 
-std::vector<aggregate_reader_metadata::row_group_info> aggregate_reader_metadata::select_row_groups(
-  std::vector<std::vector<size_type>> const& row_groups,
-  size_type& row_start,
-  size_type& row_count) const
+std::tuple<size_type, size_type, std::vector<row_group_info>>
+aggregate_reader_metadata::select_row_groups(
+  std::vector<std::vector<size_type>> const& row_groups_list,
+  size_type row_start,
+  size_type row_count) const
 {
-  if (!row_groups.empty()) {
-    std::vector<row_group_info> selection;
-    CUDF_EXPECTS(row_groups.size() == per_file_metadata.size(),
+  std::vector<row_group_info> selection;
+
+  if (!row_groups_list.empty()) {
+    CUDF_EXPECTS(row_groups_list.size() == per_file_metadata.size(),
                  "Must specify row groups for each source");
 
     row_count = 0;
-    for (size_t src_idx = 0; src_idx < row_groups.size(); ++src_idx) {
-      for (auto const& rowgroup_idx : row_groups[src_idx]) {
+    for (size_t src_idx = 0; src_idx < row_groups_list.size(); ++src_idx) {
+      for (auto const& rowgroup_idx : row_groups_list[src_idx]) {
         CUDF_EXPECTS(
           rowgroup_idx >= 0 &&
             rowgroup_idx < static_cast<size_type>(per_file_metadata[src_idx].row_groups.size()),
@@ -347,19 +349,18 @@ std::vector<aggregate_reader_metadata::row_group_info> aggregate_reader_metadata
         row_count += get_row_group(rowgroup_idx, src_idx).num_rows;
       }
     }
-    return selection;
+
+    return {row_start, row_count, std::move(selection)};
   }
 
   row_start = std::max(row_start, 0);
   if (row_count < 0) {
-    row_count = static_cast<size_type>(
-      std::min<int64_t>(get_num_rows(), std::numeric_limits<size_type>::max()));
+    row_count = std::min(get_num_rows(), std::numeric_limits<size_type>::max());
   }
-  row_count = min(row_count, get_num_rows() - row_start);
+  row_count = std::min(row_count, get_num_rows() - row_start);
   CUDF_EXPECTS(row_count >= 0, "Invalid row count");
   CUDF_EXPECTS(row_start <= get_num_rows(), "Invalid row start");
 
-  std::vector<row_group_info> selection;
   size_type count = 0;
   for (size_t src_idx = 0; src_idx < per_file_metadata.size(); ++src_idx) {
     for (size_t rg_idx = 0; rg_idx < per_file_metadata[src_idx].row_groups.size(); ++rg_idx) {
@@ -372,7 +373,7 @@ std::vector<aggregate_reader_metadata::row_group_info> aggregate_reader_metadata
     }
   }
 
-  return selection;
+  return {row_start, row_count, std::move(selection)};
 }
 
 std::tuple<std::vector<input_column_info>, std::vector<column_buffer>, std::vector<size_type>>
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cuh b/cpp/src/io/parquet/reader_impl_helpers.cuh
index b2682a55249..77f8cfc518e 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cuh
+++ b/cpp/src/io/parquet/reader_impl_helpers.cuh
@@ -50,6 +50,19 @@ inline data_type to_data_type(type_id t_id, SchemaElement const& schema)
            : data_type{t_id};
 }
 
+/**
+ * @brief The row_group_info class
+ */
+struct row_group_info {
+  size_type const index;
+  size_t const start_row;  // TODO source index
+  size_type const source_index;
+  row_group_info(size_type index, size_t start_row, size_type source_index)
+    : index(index), start_row(start_row), source_index(source_index)
+  {
+  }
+};
+
 /**
  * @brief Class for parsing dataset metadata
  */
@@ -145,29 +158,23 @@ class aggregate_reader_metadata {
    */
   [[nodiscard]] std::vector<std::string> get_pandas_index_names() const;
 
-  struct row_group_info {
-    size_type const index;
-    size_t const start_row;  // TODO source index
-    size_type const source_index;
-    row_group_info(size_type index, size_t start_row, size_type source_index)
-      : index(index), start_row(start_row), source_index(source_index)
-    {
-    }
-  };
-
   /**
    * @brief Filters and reduces down to a selection of row groups
    *
+   * The input `row_start` and `row_count` parameters will be recomputed and output as the valid
+   * values based on the input row group list.
+   *
    * @param row_groups Lists of row groups to read, one per source
    * @param row_start Starting row of the selection
    * @param row_count Total number of rows selected
    *
-   * @return List of row group indexes and its starting row
+   * @return A tuple of corrected row_start, row_count and list of row group indexes and its
+   *         starting row
    */
-  [[nodiscard]] std::vector<row_group_info> select_row_groups(
-    std::vector<std::vector<size_type>> const& row_groups,
-    size_type& row_start,
-    size_type& row_count) const;
+  [[nodiscard]] std::tuple<size_type, size_type, std::vector<row_group_info>> select_row_groups(
+    std::vector<std::vector<size_type>> const& row_groups_list,
+    size_type row_start,
+    size_type row_count) const;
 
   /**
    * @brief Filters and reduces down to a selection of columns
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 65962aee473..7658e7390ed 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -17,6 +17,8 @@
 #include "reader_impl.hpp"
 #include "reader_impl_helpers.cuh"
 
+#include <io/comp/nvcomp_adapter.hpp>
+#include <io/utilities/config_utils.hpp>
 #include <io/utilities/time_utils.cuh>
 
 #include <cudf/detail/iterator.cuh>
@@ -27,11 +29,13 @@
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/binary_search.h>
+#include <thrust/fill.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/discard_iterator.h>
 #include <thrust/iterator/iterator_categories.h>
 #include <thrust/iterator/transform_iterator.h>
+#include <thrust/logical.h>
 #include <thrust/reduce.h>
 #include <thrust/scan.h>
 #include <thrust/sort.h>
@@ -41,6 +45,734 @@ namespace cudf::io::detail::parquet {
 
 namespace {
 
+/**
+ * @brief Generate depth remappings for repetition and definition levels.
+ *
+ * When dealing with columns that contain lists, we must examine incoming
+ * repetition and definition level pairs to determine what range of output nesting
+ * is indicated when adding new values.  This function generates the mappings of
+ * the R/D levels to those start/end bounds
+ *
+ * @param remap Maps column schema index to the R/D remapping vectors for that column
+ * @param src_col_schema The column schema to generate the new mapping for
+ * @param md File metadata information
+ */
+void generate_depth_remappings(std::map<int, std::pair<std::vector<int>, std::vector<int>>>& remap,
+                               int src_col_schema,
+                               aggregate_reader_metadata const& md)
+{
+  // already generated for this level
+  if (remap.find(src_col_schema) != remap.end()) { return; }
+  auto schema   = md.get_schema(src_col_schema);
+  int max_depth = md.get_output_nesting_depth(src_col_schema);
+
+  CUDF_EXPECTS(remap.find(src_col_schema) == remap.end(),
+               "Attempting to remap a schema more than once");
+  auto inserted =
+    remap.insert(std::pair<int, std::pair<std::vector<int>, std::vector<int>>>{src_col_schema, {}});
+  auto& depth_remap = inserted.first->second;
+
+  std::vector<int>& rep_depth_remap = (depth_remap.first);
+  rep_depth_remap.resize(schema.max_repetition_level + 1);
+  std::vector<int>& def_depth_remap = (depth_remap.second);
+  def_depth_remap.resize(schema.max_definition_level + 1);
+
+  // the key:
+  // for incoming level values  R/D
+  // add values starting at the shallowest nesting level X has repetition level R
+  // until you reach the deepest nesting level Y that corresponds to the repetition level R1
+  // held by the nesting level that has definition level D
+  //
+  // Example: a 3 level struct with a list at the bottom
+  //
+  //                     R / D   Depth
+  // level0              0 / 1     0
+  //   level1            0 / 2     1
+  //     level2          0 / 3     2
+  //       list          0 / 3     3
+  //         element     1 / 4     4
+  //
+  // incoming R/D : 0, 0  -> add values from depth 0 to 3   (def level 0 always maps to depth 0)
+  // incoming R/D : 0, 1  -> add values from depth 0 to 3
+  // incoming R/D : 0, 2  -> add values from depth 0 to 3
+  // incoming R/D : 1, 4  -> add values from depth 4 to 4
+  //
+  // Note : the -validity- of values is simply checked by comparing the incoming D value against the
+  // D value of the given nesting level (incoming D >= the D for the nesting level == valid,
+  // otherwise NULL).  The tricky part is determining what nesting levels to add values at.
+  //
+  // For schemas with no repetition level (no lists), X is always 0 and Y is always max nesting
+  // depth.
+  //
+
+  // compute "X" from above
+  for (int s_idx = schema.max_repetition_level; s_idx >= 0; s_idx--) {
+    auto find_shallowest = [&](int r) {
+      int shallowest = -1;
+      int cur_depth  = max_depth - 1;
+      int schema_idx = src_col_schema;
+      while (schema_idx > 0) {
+        auto cur_schema = md.get_schema(schema_idx);
+        if (cur_schema.max_repetition_level == r) {
+          // if this is a repeated field, map it one level deeper
+          shallowest = cur_schema.is_stub() ? cur_depth + 1 : cur_depth;
+        }
+        // if it's one-level encoding list
+        else if (cur_schema.is_one_level_list()) {
+          shallowest = cur_depth - 1;
+        }
+        if (!cur_schema.is_stub()) { cur_depth--; }
+        schema_idx = cur_schema.parent_idx;
+      }
+      return shallowest;
+    };
+    rep_depth_remap[s_idx] = find_shallowest(s_idx);
+  }
+
+  // compute "Y" from above
+  for (int s_idx = schema.max_definition_level; s_idx >= 0; s_idx--) {
+    auto find_deepest = [&](int d) {
+      SchemaElement prev_schema;
+      int schema_idx = src_col_schema;
+      int r1         = 0;
+      while (schema_idx > 0) {
+        SchemaElement cur_schema = md.get_schema(schema_idx);
+        if (cur_schema.max_definition_level == d) {
+          // if this is a repeated field, map it one level deeper
+          r1 = cur_schema.is_stub() ? prev_schema.max_repetition_level
+                                    : cur_schema.max_repetition_level;
+          break;
+        }
+        prev_schema = cur_schema;
+        schema_idx  = cur_schema.parent_idx;
+      }
+
+      // we now know R1 from above. return the deepest nesting level that has the
+      // same repetition level
+      schema_idx = src_col_schema;
+      int depth  = max_depth - 1;
+      while (schema_idx > 0) {
+        SchemaElement cur_schema = md.get_schema(schema_idx);
+        if (cur_schema.max_repetition_level == r1) {
+          // if this is a repeated field, map it one level deeper
+          depth = cur_schema.is_stub() ? depth + 1 : depth;
+          break;
+        }
+        if (!cur_schema.is_stub()) { depth--; }
+        prev_schema = cur_schema;
+        schema_idx  = cur_schema.parent_idx;
+      }
+      return depth;
+    };
+    def_depth_remap[s_idx] = find_deepest(s_idx);
+  }
+}
+
+/**
+ * @brief Function that returns the required the number of bits to store a value
+ */
+template <typename T = uint8_t>
+T required_bits(uint32_t max_level)
+{
+  return static_cast<T>(CompactProtocolReader::NumRequiredBits(max_level));
+}
+
+/**
+ * @brief Converts cuDF units to Parquet units.
+ *
+ * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type.
+ */
+std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
+                                                     type_id timestamp_type_id,
+                                                     parquet::Type physical,
+                                                     int8_t converted,
+                                                     int32_t length)
+{
+  int32_t type_width = (physical == parquet::FIXED_LEN_BYTE_ARRAY) ? length : 0;
+  int32_t clock_rate = 0;
+  if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) {
+    type_width = 1;  // I32 -> I8
+  } else if (column_type_id == type_id::INT16 or column_type_id == type_id::UINT16) {
+    type_width = 2;  // I32 -> I16
+  } else if (column_type_id == type_id::INT32) {
+    type_width = 4;  // str -> hash32
+  } else if (is_chrono(data_type{column_type_id})) {
+    clock_rate = to_clockrate(timestamp_type_id);
+  }
+
+  int8_t converted_type = converted;
+  if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 &&
+      not cudf::is_fixed_point(data_type{column_type_id})) {
+    converted_type = parquet::UNKNOWN;  // Not converting to float64 or decimal
+  }
+  return std::make_tuple(type_width, clock_rate, converted_type);
+}
+
+/**
+ * TODO: Rename this into something more meaningful
+ *
+ * @brief Reads compressed page data to device memory
+ *
+ * @param page_data Buffers to hold compressed page data for each chunk
+ * @param chunks List of column chunk descriptors
+ * @param begin_chunk Index of first column chunk to read
+ * @param end_chunk Index after the last column chunk to read
+ * @param column_chunk_offsets File offset for all chunks
+ *
+ */
+std::future<void> read_column_chunks_async(
+  std::vector<std::unique_ptr<datasource>> const& sources,
+  std::vector<std::unique_ptr<datasource::buffer>>& page_data,
+  hostdevice_vector<gpu::ColumnChunkDesc>& chunks,  // TODO const?
+  size_t begin_chunk,
+  size_t end_chunk,
+  const std::vector<size_t>& column_chunk_offsets,
+  std::vector<size_type> const& chunk_source_map,
+  rmm::cuda_stream_view stream)
+{
+  // Transfer chunk data, coalescing adjacent chunks
+  std::vector<std::future<size_t>> read_tasks;
+  for (size_t chunk = begin_chunk; chunk < end_chunk;) {
+    const size_t io_offset   = column_chunk_offsets[chunk];
+    size_t io_size           = chunks[chunk].compressed_size;
+    size_t next_chunk        = chunk + 1;
+    const bool is_compressed = (chunks[chunk].codec != parquet::Compression::UNCOMPRESSED);
+    while (next_chunk < end_chunk) {
+      const size_t next_offset = column_chunk_offsets[next_chunk];
+      const bool is_next_compressed =
+        (chunks[next_chunk].codec != parquet::Compression::UNCOMPRESSED);
+      if (next_offset != io_offset + io_size || is_next_compressed != is_compressed) {
+        // Can't merge if not contiguous or mixing compressed and uncompressed
+        // Not coalescing uncompressed with compressed chunks is so that compressed buffers can be
+        // freed earlier (immediately after decompression stage) to limit peak memory requirements
+        break;
+      }
+      io_size += chunks[next_chunk].compressed_size;
+      next_chunk++;
+    }
+    if (io_size != 0) {
+      auto& source = sources[chunk_source_map[chunk]];
+      if (source->is_device_read_preferred(io_size)) {
+        auto buffer        = rmm::device_buffer(io_size, stream);
+        auto fut_read_size = source->device_read_async(
+          io_offset, io_size, static_cast<uint8_t*>(buffer.data()), stream);
+        read_tasks.emplace_back(std::move(fut_read_size));
+        page_data[chunk] = datasource::buffer::create(std::move(buffer));
+      } else {
+        auto const buffer = source->host_read(io_offset, io_size);
+        page_data[chunk] =
+          datasource::buffer::create(rmm::device_buffer(buffer->data(), buffer->size(), stream));
+      }
+      auto d_compdata = page_data[chunk]->data();
+      do {
+        chunks[chunk].compressed_data = d_compdata;
+        d_compdata += chunks[chunk].compressed_size;
+      } while (++chunk != next_chunk);
+    } else {
+      chunk = next_chunk;
+    }
+  }
+  auto sync_fn = [](decltype(read_tasks) read_tasks) {
+    for (auto& task : read_tasks) {
+      task.wait();
+    }
+  };
+  return std::async(std::launch::deferred, sync_fn, std::move(read_tasks));
+}
+
+/**
+ * @brief Returns the number of total pages from the given column chunks
+ *
+ * @param chunks List of column chunk descriptors
+ *
+ * @return The total number of pages
+ */
+size_t count_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                          rmm::cuda_stream_view stream)
+{
+  size_t total_pages = 0;
+
+  chunks.host_to_device(stream);
+  gpu::DecodePageHeaders(chunks.device_ptr(), chunks.size(), stream);
+  chunks.device_to_host(stream, true);
+
+  for (size_t c = 0; c < chunks.size(); c++) {
+    total_pages += chunks[c].num_data_pages + chunks[c].num_dict_pages;
+  }
+
+  return total_pages;
+}
+
+/**
+ * @brief Returns the page information from the given column chunks.
+ *
+ * @param chunks List of column chunk descriptors
+ * @param pages List of page information
+ */
+void decode_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                         hostdevice_vector<gpu::PageInfo>& pages,
+                         rmm::cuda_stream_view stream)
+{
+  // IMPORTANT : if you change how pages are stored within a chunk (dist pages, then data pages),
+  // please update preprocess_nested_columns to reflect this.
+  for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
+    chunks[c].max_num_pages = chunks[c].num_data_pages + chunks[c].num_dict_pages;
+    chunks[c].page_info     = pages.device_ptr(page_count);
+    page_count += chunks[c].max_num_pages;
+  }
+
+  chunks.host_to_device(stream);
+  gpu::DecodePageHeaders(chunks.device_ptr(), chunks.size(), stream);
+  pages.device_to_host(stream, true);
+}
+
+/**
+ * @brief Decompresses the page data, at page granularity.
+ *
+ * @param chunks List of column chunk descriptors
+ * @param pages List of page information
+ *
+ * @return Device buffer to decompressed page data
+ */
+rmm::device_buffer decompress_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                                        hostdevice_vector<gpu::PageInfo>& pages,
+                                        rmm::cuda_stream_view stream)
+{
+  auto for_each_codec_page = [&](parquet::Compression codec, const std::function<void(size_t)>& f) {
+    for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
+      const auto page_stride = chunks[c].max_num_pages;
+      if (chunks[c].codec == codec) {
+        for (int k = 0; k < page_stride; k++) {
+          f(page_count + k);
+        }
+      }
+      page_count += page_stride;
+    }
+  };
+
+  // Brotli scratch memory for decompressing
+  rmm::device_buffer debrotli_scratch;
+
+  // Count the exact number of compressed pages
+  size_t num_comp_pages    = 0;
+  size_t total_decomp_size = 0;
+
+  struct codec_stats {
+    parquet::Compression compression_type = UNCOMPRESSED;
+    size_t num_pages                      = 0;
+    int32_t max_decompressed_size         = 0;
+    size_t total_decomp_size              = 0;
+  };
+
+  std::array codecs{codec_stats{parquet::GZIP},
+                    codec_stats{parquet::SNAPPY},
+                    codec_stats{parquet::BROTLI},
+                    codec_stats{parquet::ZSTD}};
+
+  auto is_codec_supported = [&codecs](int8_t codec) {
+    if (codec == parquet::UNCOMPRESSED) return true;
+    return std::find_if(codecs.begin(), codecs.end(), [codec](auto& cstats) {
+             return codec == cstats.compression_type;
+           }) != codecs.end();
+  };
+  CUDF_EXPECTS(std::all_of(chunks.begin(),
+                           chunks.end(),
+                           [&is_codec_supported](auto const& chunk) {
+                             return is_codec_supported(chunk.codec);
+                           }),
+               "Unsupported compression type");
+
+  for (auto& codec : codecs) {
+    for_each_codec_page(codec.compression_type, [&](size_t page) {
+      auto page_uncomp_size = pages[page].uncompressed_page_size;
+      total_decomp_size += page_uncomp_size;
+      codec.total_decomp_size += page_uncomp_size;
+      codec.max_decompressed_size = std::max(codec.max_decompressed_size, page_uncomp_size);
+      codec.num_pages++;
+      num_comp_pages++;
+    });
+    if (codec.compression_type == parquet::BROTLI && codec.num_pages > 0) {
+      debrotli_scratch.resize(get_gpu_debrotli_scratch_size(codec.num_pages), stream);
+    }
+  }
+
+  // Dispatch batches of pages to decompress for each codec
+  rmm::device_buffer decomp_pages(total_decomp_size, stream);
+
+  std::vector<device_span<uint8_t const>> comp_in;
+  comp_in.reserve(num_comp_pages);
+  std::vector<device_span<uint8_t>> comp_out;
+  comp_out.reserve(num_comp_pages);
+
+  // vectors to save v2 def and rep level data, if any
+  std::vector<device_span<uint8_t const>> copy_in;
+  copy_in.reserve(num_comp_pages);
+  std::vector<device_span<uint8_t>> copy_out;
+  copy_out.reserve(num_comp_pages);
+
+  rmm::device_uvector<compression_result> comp_res(num_comp_pages, stream);
+  thrust::fill(rmm::exec_policy(stream),
+               comp_res.begin(),
+               comp_res.end(),
+               compression_result{0, compression_status::FAILURE});
+
+  size_t decomp_offset = 0;
+  int32_t start_pos    = 0;
+  for (const auto& codec : codecs) {
+    if (codec.num_pages == 0) { continue; }
+
+    for_each_codec_page(codec.compression_type, [&](size_t page_idx) {
+      auto const dst_base = static_cast<uint8_t*>(decomp_pages.data()) + decomp_offset;
+      auto& page          = pages[page_idx];
+      // offset will only be non-zero for V2 pages
+      auto const offset = page.def_lvl_bytes + page.rep_lvl_bytes;
+      // for V2 need to copy def and rep level info into place, and then offset the
+      // input and output buffers. otherwise we'd have to keep both the compressed
+      // and decompressed data.
+      if (offset != 0) {
+        copy_in.emplace_back(page.page_data, offset);
+        copy_out.emplace_back(dst_base, offset);
+      }
+      comp_in.emplace_back(page.page_data + offset,
+                           static_cast<size_t>(page.compressed_page_size - offset));
+      comp_out.emplace_back(dst_base + offset,
+                            static_cast<size_t>(page.uncompressed_page_size - offset));
+      page.page_data = dst_base;
+      decomp_offset += page.uncompressed_page_size;
+    });
+
+    host_span<device_span<uint8_t const> const> comp_in_view{comp_in.data() + start_pos,
+                                                             codec.num_pages};
+    auto const d_comp_in = cudf::detail::make_device_uvector_async(comp_in_view, stream);
+    host_span<device_span<uint8_t> const> comp_out_view(comp_out.data() + start_pos,
+                                                        codec.num_pages);
+    auto const d_comp_out = cudf::detail::make_device_uvector_async(comp_out_view, stream);
+    device_span<compression_result> d_comp_res_view(comp_res.data() + start_pos, codec.num_pages);
+
+    switch (codec.compression_type) {
+      case parquet::GZIP:
+        gpuinflate(d_comp_in, d_comp_out, d_comp_res_view, gzip_header_included::YES, stream);
+        break;
+      case parquet::SNAPPY:
+        if (nvcomp_integration::is_stable_enabled()) {
+          nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY,
+                                     d_comp_in,
+                                     d_comp_out,
+                                     d_comp_res_view,
+                                     codec.max_decompressed_size,
+                                     codec.total_decomp_size,
+                                     stream);
+        } else {
+          gpu_unsnap(d_comp_in, d_comp_out, d_comp_res_view, stream);
+        }
+        break;
+      case parquet::ZSTD:
+        nvcomp::batched_decompress(nvcomp::compression_type::ZSTD,
+                                   d_comp_in,
+                                   d_comp_out,
+                                   d_comp_res_view,
+                                   codec.max_decompressed_size,
+                                   codec.total_decomp_size,
+                                   stream);
+        break;
+      case parquet::BROTLI:
+        gpu_debrotli(d_comp_in,
+                     d_comp_out,
+                     d_comp_res_view,
+                     debrotli_scratch.data(),
+                     debrotli_scratch.size(),
+                     stream);
+        break;
+      default: CUDF_FAIL("Unexpected decompression dispatch"); break;
+    }
+    start_pos += codec.num_pages;
+  }
+
+  CUDF_EXPECTS(thrust::all_of(rmm::exec_policy(stream),
+                              comp_res.begin(),
+                              comp_res.end(),
+                              [] __device__(auto const& res) {
+                                return res.status == compression_status::SUCCESS;
+                              }),
+               "Error during decompression");
+
+  // now copy the uncompressed V2 def and rep level data
+  if (not copy_in.empty()) {
+    auto const d_copy_in  = cudf::detail::make_device_uvector_async(copy_in, stream);
+    auto const d_copy_out = cudf::detail::make_device_uvector_async(copy_out, stream);
+
+    gpu_copy_uncompressed_blocks(d_copy_in, d_copy_out, stream);
+    stream.synchronize();
+  }
+
+  // Update the page information in device memory with the updated value of
+  // page_data; it now points to the uncompressed data buffer
+  pages.host_to_device(stream);
+
+  return decomp_pages;
+}
+
+}  // namespace
+
+void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc> const& chunks,
+                                         hostdevice_vector<gpu::PageInfo>& pages,
+                                         hostdevice_vector<gpu::PageNestingInfo>& page_nesting_info)
+{
+  // compute total # of page_nesting infos needed and allocate space. doing this in one
+  // buffer to keep it to a single gpu allocation
+  size_t const total_page_nesting_infos = std::accumulate(
+    chunks.host_ptr(), chunks.host_ptr() + chunks.size(), 0, [&](int total, auto& chunk) {
+      // the schema of the input column
+      auto const& schema                    = _metadata->get_schema(chunk.src_col_schema);
+      auto const per_page_nesting_info_size = max(
+        schema.max_definition_level + 1, _metadata->get_output_nesting_depth(chunk.src_col_schema));
+      return total + (per_page_nesting_info_size * chunk.num_data_pages);
+    });
+
+  page_nesting_info = hostdevice_vector<gpu::PageNestingInfo>{total_page_nesting_infos, _stream};
+
+  // retrieve from the gpu so we can update
+  pages.device_to_host(_stream, true);
+
+  // update pointers in the PageInfos
+  int target_page_index = 0;
+  int src_info_index    = 0;
+  for (size_t idx = 0; idx < chunks.size(); idx++) {
+    int src_col_schema                    = chunks[idx].src_col_schema;
+    auto& schema                          = _metadata->get_schema(src_col_schema);
+    auto const per_page_nesting_info_size = std::max(
+      schema.max_definition_level + 1, _metadata->get_output_nesting_depth(src_col_schema));
+    auto const type_id = to_type_id(schema, _strings_to_categorical, _timestamp_type.id());
+
+    // skip my dict pages
+    target_page_index += chunks[idx].num_dict_pages;
+    for (int p_idx = 0; p_idx < chunks[idx].num_data_pages; p_idx++) {
+      pages[target_page_index + p_idx].nesting = page_nesting_info.device_ptr() + src_info_index;
+      pages[target_page_index + p_idx].num_nesting_levels = per_page_nesting_info_size;
+
+      // this isn't the ideal place to be setting this value (it's not obvious this function would
+      // do it) but we don't have any other places that go host->device with the pages and I'd like
+      // to avoid another copy
+      pages[target_page_index + p_idx].type = type_id;
+
+      src_info_index += per_page_nesting_info_size;
+    }
+    target_page_index += chunks[idx].num_data_pages;
+  }
+
+  // copy back to the gpu
+  pages.host_to_device(_stream);
+
+  // fill in
+  int nesting_info_index = 0;
+  std::map<int, std::pair<std::vector<int>, std::vector<int>>> depth_remapping;
+  for (size_t idx = 0; idx < chunks.size(); idx++) {
+    int src_col_schema = chunks[idx].src_col_schema;
+
+    // schema of the input column
+    auto& schema = _metadata->get_schema(src_col_schema);
+    // real depth of the output cudf column hierarchy (1 == no nesting, 2 == 1 level, etc)
+    int max_depth = _metadata->get_output_nesting_depth(src_col_schema);
+
+    // # of nesting infos stored per page for this column
+    auto const per_page_nesting_info_size = std::max(schema.max_definition_level + 1, max_depth);
+
+    // if this column has lists, generate depth remapping
+    std::map<int, std::pair<std::vector<int>, std::vector<int>>> depth_remapping;
+    if (schema.max_repetition_level > 0) {
+      generate_depth_remappings(depth_remapping, src_col_schema, *_metadata);
+    }
+
+    // fill in host-side nesting info
+    int schema_idx  = src_col_schema;
+    auto cur_schema = _metadata->get_schema(schema_idx);
+    int cur_depth   = max_depth - 1;
+    while (schema_idx > 0) {
+      // stub columns (basically the inner field of a list scheme element) are not real columns.
+      // we can ignore them for the purposes of output nesting info
+      if (!cur_schema.is_stub()) {
+        // initialize each page within the chunk
+        for (int p_idx = 0; p_idx < chunks[idx].num_data_pages; p_idx++) {
+          gpu::PageNestingInfo* pni =
+            &page_nesting_info[nesting_info_index + (p_idx * per_page_nesting_info_size)];
+
+          // if we have lists, set our start and end depth remappings
+          if (schema.max_repetition_level > 0) {
+            auto remap = depth_remapping.find(src_col_schema);
+            CUDF_EXPECTS(remap != depth_remapping.end(),
+                         "Could not find depth remapping for schema");
+            std::vector<int> const& rep_depth_remap = (remap->second.first);
+            std::vector<int> const& def_depth_remap = (remap->second.second);
+
+            for (size_t m = 0; m < rep_depth_remap.size(); m++) {
+              pni[m].start_depth = rep_depth_remap[m];
+            }
+            for (size_t m = 0; m < def_depth_remap.size(); m++) {
+              pni[m].end_depth = def_depth_remap[m];
+            }
+          }
+
+          // values indexed by output column index
+          pni[cur_depth].max_def_level = cur_schema.max_definition_level;
+          pni[cur_depth].max_rep_level = cur_schema.max_repetition_level;
+          pni[cur_depth].size          = 0;
+        }
+
+        // move up the hierarchy
+        cur_depth--;
+      }
+
+      // next schema
+      schema_idx = cur_schema.parent_idx;
+      cur_schema = _metadata->get_schema(schema_idx);
+    }
+
+    nesting_info_index += (per_page_nesting_info_size * chunks[idx].num_data_pages);
+  }
+
+  // copy nesting info to the device
+  page_nesting_info.host_to_device(_stream);
+}
+
+void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& row_groups_info,
+                                            size_type num_rows)
+{
+  // This function should never be called if `num_rows == 0`.
+  CUDF_EXPECTS(num_rows > 0, "Number of reading rows must not be zero.");
+
+  // Descriptors for all the chunks that make up the selected columns
+  const auto num_input_columns = _input_columns.size();
+  const auto num_chunks        = row_groups_info.size() * num_input_columns;
+  _file_itm_data.chunks        = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
+
+  // Association between each column chunk and its source
+  std::vector<size_type> chunk_source_map(num_chunks);
+
+  // Tracker for eventually deallocating compressed and uncompressed data
+  _file_itm_data.raw_page_data = std::vector<std::unique_ptr<datasource::buffer>>(num_chunks);
+
+  // Keep track of column chunk file offsets
+  std::vector<size_t> column_chunk_offsets(num_chunks);
+
+  // Initialize column chunk information
+  size_t total_decompressed_size = 0;
+  auto remaining_rows            = num_rows;
+  std::vector<std::future<void>> read_rowgroup_tasks;
+  for (const auto& rg : row_groups_info) {
+    const auto& row_group       = _metadata->get_row_group(rg.index, rg.source_index);
+    auto const row_group_start  = rg.start_row;
+    auto const row_group_source = rg.source_index;
+    auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
+    auto const io_chunk_idx     = _file_itm_data.chunks.size();
+
+    // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
+    for (size_t i = 0; i < num_input_columns; ++i) {
+      auto col = _input_columns[i];
+      // look up metadata
+      auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx);
+      auto& schema   = _metadata->get_schema(col.schema_idx);
+
+      auto [type_width, clock_rate, converted_type] =
+        conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()),
+                        _timestamp_type.id(),
+                        schema.type,
+                        schema.converted_type,
+                        schema.type_length);
+
+      column_chunk_offsets[_file_itm_data.chunks.size()] =
+        (col_meta.dictionary_page_offset != 0)
+          ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
+          : col_meta.data_page_offset;
+
+      _file_itm_data.chunks.push_back(
+        gpu::ColumnChunkDesc(col_meta.total_compressed_size,
+                             nullptr,
+                             col_meta.num_values,
+                             schema.type,
+                             type_width,
+                             row_group_start,
+                             row_group_rows,
+                             schema.max_definition_level,
+                             schema.max_repetition_level,
+                             _metadata->get_output_nesting_depth(col.schema_idx),
+                             required_bits(schema.max_definition_level),
+                             required_bits(schema.max_repetition_level),
+                             col_meta.codec,
+                             converted_type,
+                             schema.logical_type,
+                             schema.decimal_scale,
+                             clock_rate,
+                             i,
+                             col.schema_idx));
+
+      // Map each column chunk to its column index and its source index
+      chunk_source_map[_file_itm_data.chunks.size() - 1] = row_group_source;
+
+      if (col_meta.codec != Compression::UNCOMPRESSED) {
+        total_decompressed_size += col_meta.total_uncompressed_size;
+      }
+    }
+    // Read compressed chunk data to device memory
+    read_rowgroup_tasks.push_back(read_column_chunks_async(_sources,
+                                                           _file_itm_data.raw_page_data,
+                                                           _file_itm_data.chunks,
+                                                           io_chunk_idx,
+                                                           _file_itm_data.chunks.size(),
+                                                           column_chunk_offsets,
+                                                           chunk_source_map,
+                                                           _stream));
+
+    remaining_rows -= row_group.num_rows;
+  }
+  for (auto& task : read_rowgroup_tasks) {
+    task.wait();
+  }
+  assert(remaining_rows <= 0);
+
+  // Process dataset chunk pages into output columns
+  const auto total_pages    = count_page_headers(_file_itm_data.chunks, _stream);
+  _file_itm_data.pages_info = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
+
+  if (total_pages > 0) {
+    // decoding of column/page information
+    decode_page_headers(_file_itm_data.chunks, _file_itm_data.pages_info, _stream);
+    if (total_decompressed_size > 0) {
+      _file_itm_data.decomp_page_data =
+        decompress_page_data(_file_itm_data.chunks, _file_itm_data.pages_info, _stream);
+      // Free compressed data
+      for (size_t c = 0; c < _file_itm_data.chunks.size(); c++) {
+        if (_file_itm_data.chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
+          _file_itm_data.raw_page_data[c].reset();
+          // TODO: Check if this is called
+        }
+      }
+    }
+
+    // build output column info
+    // walk the schema, building out_buffers that mirror what our final cudf columns will look
+    // like. important : there is not necessarily a 1:1 mapping between input columns and output
+    // columns. For example, parquet does not explicitly store a ColumnChunkDesc for struct
+    // columns. The "structiness" is simply implied by the schema.  For example, this schema:
+    //  required group field_id=1 name {
+    //    required binary field_id=2 firstname (String);
+    //    required binary field_id=3 middlename (String);
+    //    required binary field_id=4 lastname (String);
+    // }
+    // will only contain 3 columns of data (firstname, middlename, lastname).  But of course
+    // "name" is a struct column that we want to return, so we have to make sure that we
+    // create it ourselves.
+    // std::vector<output_column_info> output_info = build_output_column_info();
+
+    // nesting information (sizes, etc) stored -per page-
+    // note : even for flat schemas, we allocate 1 level of "nesting" info
+
+    allocate_nesting_info(
+      _file_itm_data.chunks, _file_itm_data.pages_info, _file_itm_data.page_nesting_info);
+  }
+}
+
+namespace {
+
 #if defined(PREPROCESS_DEBUG)
 void print_pages(hostdevice_vector<gpu::PageInfo>& pages, rmm::cuda_stream_view _stream)
 {
@@ -355,21 +1087,6 @@ struct get_page_schema {
   __device__ size_type operator()(gpu::PageInfo const& page) { return page.src_col_schema; }
 };
 
-struct get_page_nesting_size {
-  size_type const src_col_schema;
-  size_type const depth;
-  gpu::PageInfo const* const pages;
-
-  __device__ size_type operator()(int index)
-  {
-    auto const& page = pages[index];
-    if (page.src_col_schema != src_col_schema || page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
-      return 0;
-    }
-    return page.nesting[depth].size;
-  }
-};
-
 struct chunk_row_output_iter {
   gpu::PageInfo* p;
   using value_type        = size_type;
@@ -390,50 +1107,6 @@ struct chunk_row_output_iter {
   // __device__ void operator=(value_type v) { p->chunk_row = v; }
 };
 
-struct start_offset_output_iterator {
-  gpu::PageInfo* pages;
-  int const* page_indices;
-  int cur_index;
-  int src_col_schema;
-  int nesting_depth;
-  int empty               = 0;
-  using value_type        = size_type;
-  using difference_type   = size_type;
-  using pointer           = size_type*;
-  using reference         = size_type&;
-  using iterator_category = thrust::output_device_iterator_tag;
-
-  __host__ __device__ void operator=(start_offset_output_iterator const& other)
-  {
-    pages          = other.pages;
-    page_indices   = other.page_indices;
-    cur_index      = other.cur_index;
-    src_col_schema = other.src_col_schema;
-    nesting_depth  = other.nesting_depth;
-  }
-
-  __host__ __device__ start_offset_output_iterator operator+(int i)
-  {
-    return start_offset_output_iterator{
-      pages, page_indices, cur_index + i, src_col_schema, nesting_depth};
-  }
-
-  __host__ __device__ void operator++() { cur_index++; }
-
-  __device__ reference operator[](int i) { return dereference(cur_index + i); }
-  __device__ reference operator*() { return dereference(cur_index); }
-
- private:
-  __device__ reference dereference(int index)
-  {
-    gpu::PageInfo const& p = pages[page_indices[index]];
-    if (p.src_col_schema != src_col_schema || p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
-      return empty;
-    }
-    return p.nesting[nesting_depth].page_start_value;
-  }
-};
-
 }  // anonymous namespace
 
 void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
@@ -450,7 +1123,7 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
     auto const& input_col  = _input_columns[idx];
     size_t const max_depth = input_col.nesting_depth();
 
-    auto* cols = &_output_columns;
+    auto* cols = &_output_buffers;
     for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
       auto& out_buf = (*cols)[input_col.nesting[l_idx]];
       cols          = &out_buf.children;
@@ -588,305 +1261,4 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
       : std::vector<gpu::chunk_read_info>{{min_row, num_rows}};
 }
 
-void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                    hostdevice_vector<gpu::PageInfo>& pages,
-                                    gpu::chunk_intermediate_data const& id,
-                                    size_t min_row,
-                                    size_t num_rows,
-                                    bool uses_custom_row_bounds)
-{
-  // computes:
-  // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
-  // PageInfo::skipped_values, which tells us where to start decoding in the input to respect the
-  // user bounds.
-  // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
-  // specified artifical bounds).
-  if (uses_custom_row_bounds) {
-    gpu::ComputePageSizes(pages,
-                          chunks,
-                          min_row,
-                          num_rows,
-                          false,  // num_rows is already computed
-                          false,  // no need to compute string sizes
-                          _stream);
-    // print_pages(pages, _stream);
-  }
-
-  // iterate over all input columns and allocate any associated output
-  // buffers if they are not part of a list hierarchy. mark down
-  // if we have any list columns that need further processing.
-  bool has_lists = false;
-  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
-    auto const& input_col  = _input_columns[idx];
-    size_t const max_depth = input_col.nesting_depth();
-
-    auto* cols = &_output_columns;
-    for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
-      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
-      cols          = &out_buf.children;
-
-      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
-      // to know how big this buffer actually is.
-      if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
-        has_lists = true;
-      }
-      // if we haven't already processed this column because it is part of a struct hierarchy
-      else if (out_buf.size == 0) {
-        // add 1 for the offset if this is a list column
-        out_buf.create(
-          out_buf.type.id() == type_id::LIST && l_idx < max_depth ? num_rows + 1 : num_rows,
-          _stream,
-          _mr);
-      }
-    }
-  }
-
-  // compute output column sizes by examining the pages of the -input- columns
-  if (has_lists) {
-    auto& page_keys  = _chunk_itm_data.page_keys;
-    auto& page_index = _chunk_itm_data.page_index;
-    for (size_t idx = 0; idx < _input_columns.size(); idx++) {
-      auto const& input_col = _input_columns[idx];
-      auto src_col_schema   = input_col.schema_idx;
-      size_t max_depth      = input_col.nesting_depth();
-
-      auto* cols = &_output_columns;
-      for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
-        auto& out_buf = (*cols)[input_col.nesting[l_idx]];
-        cols          = &out_buf.children;
-
-        // size iterator. indexes pages by sorted order
-        auto size_input = thrust::make_transform_iterator(
-          page_index.begin(),
-          get_page_nesting_size{src_col_schema, static_cast<size_type>(l_idx), pages.device_ptr()});
-
-        // if this buffer is part of a list hierarchy, we need to determine it's
-        // final size and allocate it here.
-        //
-        // for struct columns, higher levels of the output columns are shared between input
-        // columns. so don't compute any given level more than once.
-        if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) && out_buf.size == 0) {
-          int size =
-            thrust::reduce(rmm::exec_policy(_stream), size_input, size_input + pages.size());
-
-          // if this is a list column add 1 for non-leaf levels for the terminating offset
-          if (out_buf.type.id() == type_id::LIST && l_idx < max_depth) { size++; }
-
-          // allocate
-          out_buf.create(size, _stream, _mr);
-        }
-
-        // for nested hierarchies, compute per-page start offset
-        if (input_col.has_repetition) {
-          thrust::exclusive_scan_by_key(
-            rmm::exec_policy(_stream),
-            page_keys.begin(),
-            page_keys.end(),
-            size_input,
-            start_offset_output_iterator{pages.device_ptr(),
-                                         page_index.begin(),
-                                         0,
-                                         static_cast<int>(src_col_schema),
-                                         static_cast<int>(l_idx)});
-        }
-      }
-    }
-  }
-}
-
-namespace {
-/**
- * @brief Function that returns the required the number of bits to store a value
- */
-template <typename T = uint8_t>
-T required_bits(uint32_t max_level)
-{
-  return static_cast<T>(CompactProtocolReader::NumRequiredBits(max_level));
-}
-
-/**
- * @brief Converts cuDF units to Parquet units.
- *
- * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type.
- */
-std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
-                                                     type_id timestamp_type_id,
-                                                     parquet::Type physical,
-                                                     int8_t converted,
-                                                     int32_t length)
-{
-  int32_t type_width = (physical == parquet::FIXED_LEN_BYTE_ARRAY) ? length : 0;
-  int32_t clock_rate = 0;
-  if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) {
-    type_width = 1;  // I32 -> I8
-  } else if (column_type_id == type_id::INT16 or column_type_id == type_id::UINT16) {
-    type_width = 2;  // I32 -> I16
-  } else if (column_type_id == type_id::INT32) {
-    type_width = 4;  // str -> hash32
-  } else if (is_chrono(data_type{column_type_id})) {
-    clock_rate = to_clockrate(timestamp_type_id);
-  }
-
-  int8_t converted_type = converted;
-  if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 &&
-      not cudf::is_fixed_point(data_type{column_type_id})) {
-    converted_type = parquet::UNKNOWN;  // Not converting to float64 or decimal
-  }
-  return std::make_tuple(type_width, clock_rate, converted_type);
-}
-
-}  // namespace
-
-std::pair<size_type, size_type> reader::impl::preprocess_file(
-  size_type skip_rows,
-  size_type num_rows,
-  const std::vector<std::vector<size_type>>& row_group_list)
-{
-  //  printf("\n\n\n\npreprocess========================\n");
-
-  // Select only row groups required
-  // Note: `skip_rows` and `num_rows` will be modified in this function.
-  const auto selected_row_groups =
-    _metadata->select_row_groups(row_group_list, skip_rows, num_rows);
-
-  // TODO: fix this
-  if (selected_row_groups.size() == 0 || _input_columns.size() == 0) {
-    return {skip_rows, num_rows};
-  }
-
-  // TODO: fix this.
-  // Need to check if the file actually has data.
-  _file_itm_data.has_data = true;
-
-  // Descriptors for all the chunks that make up the selected columns
-  const auto num_input_columns = _input_columns.size();
-  const auto num_chunks        = selected_row_groups.size() * num_input_columns;
-  _file_itm_data.chunks        = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
-
-  // Association between each column chunk and its source
-  std::vector<size_type> chunk_source_map(num_chunks);
-
-  // Tracker for eventually deallocating compressed and uncompressed data
-  _file_itm_data.raw_page_data = std::vector<std::unique_ptr<datasource::buffer>>(num_chunks);
-
-  // Keep track of column chunk file offsets
-  std::vector<size_t> column_chunk_offsets(num_chunks);
-
-  // Initialize column chunk information
-  size_t total_decompressed_size = 0;
-  auto remaining_rows            = num_rows;
-  std::vector<std::future<void>> read_rowgroup_tasks;
-  for (const auto& rg : selected_row_groups) {
-    const auto& row_group       = _metadata->get_row_group(rg.index, rg.source_index);
-    auto const row_group_start  = rg.start_row;
-    auto const row_group_source = rg.source_index;
-    auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
-    auto const io_chunk_idx     = _file_itm_data.chunks.size();
-
-    // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
-    for (size_t i = 0; i < num_input_columns; ++i) {
-      auto col = _input_columns[i];
-      // look up metadata
-      auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx);
-      auto& schema   = _metadata->get_schema(col.schema_idx);
-
-      auto [type_width, clock_rate, converted_type] =
-        conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()),
-                        _timestamp_type.id(),
-                        schema.type,
-                        schema.converted_type,
-                        schema.type_length);
-
-      column_chunk_offsets[_file_itm_data.chunks.size()] =
-        (col_meta.dictionary_page_offset != 0)
-          ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
-          : col_meta.data_page_offset;
-
-      _file_itm_data.chunks.push_back(
-        gpu::ColumnChunkDesc(col_meta.total_compressed_size,
-                             nullptr,
-                             col_meta.num_values,
-                             schema.type,
-                             type_width,
-                             row_group_start,
-                             row_group_rows,
-                             schema.max_definition_level,
-                             schema.max_repetition_level,
-                             _metadata->get_output_nesting_depth(col.schema_idx),
-                             required_bits(schema.max_definition_level),
-                             required_bits(schema.max_repetition_level),
-                             col_meta.codec,
-                             converted_type,
-                             schema.logical_type,
-                             schema.decimal_scale,
-                             clock_rate,
-                             i,
-                             col.schema_idx));
-
-      // Map each column chunk to its column index and its source index
-      chunk_source_map[_file_itm_data.chunks.size() - 1] = row_group_source;
-
-      if (col_meta.codec != Compression::UNCOMPRESSED) {
-        total_decompressed_size += col_meta.total_uncompressed_size;
-      }
-    }
-    // Read compressed chunk data to device memory
-    read_rowgroup_tasks.push_back(read_column_chunks(_file_itm_data.raw_page_data,
-                                                     _file_itm_data.chunks,
-                                                     io_chunk_idx,
-                                                     _file_itm_data.chunks.size(),
-                                                     column_chunk_offsets,
-                                                     chunk_source_map));
-
-    remaining_rows -= row_group.num_rows;
-  }
-  for (auto& task : read_rowgroup_tasks) {
-    task.wait();
-  }
-  assert(remaining_rows <= 0);
-
-  // Process dataset chunk pages into output columns
-  const auto total_pages    = count_page_headers(_file_itm_data.chunks);
-  _file_itm_data.pages_info = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
-
-  if (total_pages > 0) {
-    // decoding of column/page information
-    decode_page_headers(_file_itm_data.chunks, _file_itm_data.pages_info);
-    if (total_decompressed_size > 0) {
-      _file_itm_data.decomp_page_data =
-        decompress_page_data(_file_itm_data.chunks, _file_itm_data.pages_info);
-      // Free compressed data
-      for (size_t c = 0; c < _file_itm_data.chunks.size(); c++) {
-        if (_file_itm_data.chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
-          _file_itm_data.raw_page_data[c].reset();
-          // TODO: Check if this is called
-        }
-      }
-    }
-
-    // build output column info
-    // walk the schema, building out_buffers that mirror what our final cudf columns will look
-    // like. important : there is not necessarily a 1:1 mapping between input columns and output
-    // columns. For example, parquet does not explicitly store a ColumnChunkDesc for struct
-    // columns. The "structiness" is simply implied by the schema.  For example, this schema:
-    //  required group field_id=1 name {
-    //    required binary field_id=2 firstname (String);
-    //    required binary field_id=3 middlename (String);
-    //    required binary field_id=4 lastname (String);
-    // }
-    // will only contain 3 columns of data (firstname, middlename, lastname).  But of course
-    // "name" is a struct column that we want to return, so we have to make sure that we
-    // create it ourselves.
-    // std::vector<output_column_info> output_info = build_output_column_info();
-
-    // nesting information (sizes, etc) stored -per page-
-    // note : even for flat schemas, we allocate 1 level of "nesting" info
-
-    allocate_nesting_info(
-      _file_itm_data.chunks, _file_itm_data.pages_info, _file_itm_data.page_nesting_info);
-  }
-
-  return {skip_rows, num_rows};
-}
-
 }  // namespace cudf::io::detail::parquet

From c2bf7f5fc853659ccaa047a6ccc8936db49ce03a Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Thu, 27 Oct 2022 17:51:10 -0500
Subject: [PATCH 111/162] Fixed issues with list, and validity size
 calculations.

---
 cpp/src/io/parquet/parquet_gpu.hpp           |  3 +-
 cpp/src/io/parquet/reader_impl.cu            |  8 +-
 cpp/src/io/parquet/reader_impl_preprocess.cu | 26 +++---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 85 ++++++++++++++++++--
 4 files changed, 102 insertions(+), 20 deletions(-)

diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index d344ad8e094..4b0dd760b79 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -107,6 +107,8 @@ struct PageNestingInfo {
   // set at initialization
   int32_t max_def_level;
   int32_t max_rep_level;
+  cudf::type_id type;  // type of the corresponding cudf output column
+  bool nullable;
 
   // set during preprocessing
   int32_t size;  // this page/nesting-level's row count contribution to the output column, if fully
@@ -152,7 +154,6 @@ struct PageInfo {
   Encoding encoding;       // Encoding for data or dictionary page
   Encoding definition_level_encoding;  // Encoding used for definition levels (data page)
   Encoding repetition_level_encoding;  // Encoding used for repetition levels (data page)
-  cudf::type_id type;                  // type of this page.
 
   // for nested types, we run a preprocess step in order to determine output
   // column sizes. Because of this, we can jump directly to the position in the
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 434ae3391bc..4a3cdeb2bb9 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -484,11 +484,6 @@ void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc>
       pages[target_page_index + p_idx].nesting = page_nesting_info.device_ptr() + src_info_index;
       pages[target_page_index + p_idx].num_nesting_levels = per_page_nesting_info_size;
 
-      // this isn't the ideal place to be setting this value (it's not obvious this function would
-      // do it) but we don't have any other places that go host->device with the pages and I'd like
-      // to avoid another copy
-      pages[target_page_index + p_idx].type = type_id;
-
       src_info_index += per_page_nesting_info_size;
     }
     target_page_index += chunks[idx].num_data_pages;
@@ -550,6 +545,9 @@ void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc>
           pni[cur_depth].max_def_level = cur_schema.max_definition_level;
           pni[cur_depth].max_rep_level = cur_schema.max_repetition_level;
           pni[cur_depth].size          = 0;
+          pni[cur_depth].type =
+            to_type_id(cur_schema, _strings_to_categorical, _timestamp_type.id());
+          pni[cur_depth].nullable = cur_schema.repetition_type == OPTIONAL;
         }
 
         // move up the hierarchy
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 4d128ff910d..f89516726c8 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -91,7 +91,7 @@ struct cumulative_row_sum {
 struct row_size_functor {
   __device__ size_t validity_size(size_t num_rows, bool nullable)
   {
-    return nullable ? (cudf::util::div_rounding_up_safe(num_rows, size_t{32}) / 8) : 0;
+    return nullable ? (cudf::util::div_rounding_up_safe(num_rows, size_t{32}) * 4) : 0;
   }
 
   template <typename T>
@@ -133,14 +133,22 @@ struct get_cumulative_row_info {
     if (page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
       return cumulative_row_info{0, 0, page.src_col_schema};
     }
-    size_t const row_count = page.nesting[0].size;
-    return cumulative_row_info{
-      row_count,
-      // note: the size of the actual char bytes for strings is tracked in the `str_bytes` field, so
-      // the row_size_functor{} itself is only returning the size of offsets+validity
-      cudf::type_dispatcher(data_type{page.type}, row_size_functor{}, row_count, false) +
-        page.str_bytes,
-      page.src_col_schema};
+
+    // total nested size, not counting string data
+    auto iter =
+      cudf::detail::make_counting_transform_iterator(0, [page, index] __device__(size_type i) {
+        auto const& pni = page.nesting[i];
+        if (index == 1) {
+          auto const size =
+            cudf::type_dispatcher(data_type{pni.type}, row_size_functor{}, pni.size, pni.nullable);
+        }
+        return cudf::type_dispatcher(
+          data_type{pni.type}, row_size_functor{}, pni.size, pni.nullable);
+      });
+
+    size_t const row_count = static_cast<size_t>(page.nesting[0].size);
+    return {row_count,
+            thrust::reduce(thrust::seq, iter, iter + page.num_nesting_levels) + page.str_bytes};
   }
 };
 
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 9c2ad89754f..5aeae39ad9a 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -336,11 +336,14 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithLists)
 
   auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
-    auto const int_iter = thrust::make_counting_iterator(0);
-    input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
-
+    // 20000 rows in 1 page consist of:
+    //
+    // 20001 offsets :   80004  bytes
+    // 30000 ints    :   120000 bytes
+    // total         :   200004 bytes
     auto const template_lists = int32s_lists_col{
       int32s_lists_col{}, int32s_lists_col{0}, int32s_lists_col{0, 1}, int32s_lists_col{0, 1, 2}};
+
     auto const gather_iter =
       cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return i % 4; });
     auto const gather_map = int32s_col(gather_iter, gather_iter + num_rows);
@@ -357,14 +360,86 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithLists)
     return std::tuple{std::move(input_table), std::move(result), num_chunks};
   };
 
+  // chunk size slightly less than 1 page (forcing it to be at least 1 page per read)
+  {
+    auto const [input, result, num_chunks] = do_test(200'000, false);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
+
+  // chunk size exactly 1 page
   {
-    auto const [input, result, num_chunks] = do_test(400'000, false);
+    auto const [input, result, num_chunks] = do_test(200'004, true);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
+
+  // chunk size 2 pages. 3 chunks (2 pages + 2 pages + 1 page)
+  {
+    auto const [input, result, num_chunks] = do_test(400'008, true);
     EXPECT_EQ(num_chunks, 3);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
   }
+}
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsNulls)
+{
+  auto constexpr num_rows = 100'000;
+  // auto constexpr num_rows = 100;
+
+  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    // 20000 rows in 1 page consist of:
+    //
+    // 625 validity words :   2500 bytes
+    // 20001 offsets      :   80004  bytes
+    // 15000 ints         :   60000 bytes
+    // total              :   142504 bytes
+    auto const template_lists = int32s_lists_col{// these will all be null
+                                                 int32s_lists_col{},
+                                                 int32s_lists_col{0},
+                                                 int32s_lists_col{1, 2},
+                                                 int32s_lists_col{3, 4, 5}};
+    auto const gather_iter =
+      cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return i % 4; });
+    auto const gather_map = int32s_col(gather_iter, gather_iter + num_rows);
+    auto intermediate =
+      std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front());
+    auto const valids = cudf::detail::make_counting_transform_iterator(
+      0, [](cudf::size_type i) { return i % 4 == 3 ? 0 : 1; });
+    intermediate->set_null_mask(cudf::test::detail::make_null_mask(valids, valids + num_rows),
+                                num_rows / 4);
+
+    input_columns.emplace_back(
+      cudf::purge_nonempty_nulls(cudf::lists_column_view{intermediate->view()}));
+
+    auto [input_table, filepath] = write_file(input_columns,
+                                              "chunked_read_with_lists_nulls",
+                                              nullable,
+                                              512 * 1024,  // 512KB per page
+                                              20000        // 20k rows per page
+    );
+    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
+    return std::tuple{std::move(input_table), std::move(result), num_chunks};
+  };
+
+  // chunk size slightly less than 1 page (forcing it to be at least 1 page per read)
+  {
+    auto const [input, result, num_chunks] = do_test(142'500, false);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
+
+  // chunk size exactly 1 page
+  {
+    auto const [input, result, num_chunks] = do_test(142'504, false);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+  }
 
+  // chunk size 2 pages. 3 chunks (2 pages + 2 pages + 1 page)
   {
-    auto const [input, result, num_chunks] = do_test(400'000, true);
+    auto const [input, result, num_chunks] = do_test(285'008, false);
     EXPECT_EQ(num_chunks, 3);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
   }

From e7e74c5f329d281ac4882a2cd534dc7758331221 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 27 Oct 2022 15:56:36 -0700
Subject: [PATCH 112/162] More refactoring

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu            |  30 ++---
 cpp/src/io/parquet/reader_impl.hpp           | 115 +++++++++----------
 cpp/src/io/parquet/reader_impl_helpers.cu    |  10 +-
 cpp/src/io/parquet/reader_impl_helpers.cuh   |   4 +-
 cpp/src/io/parquet/reader_impl_preprocess.cu |  14 +--
 5 files changed, 85 insertions(+), 88 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 744f7dcc26b..83f15b2659d 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -114,7 +114,7 @@ void copy_output_buffer(column_buffer const& buff, column_buffer& new_buff)
 void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                                     hostdevice_vector<gpu::PageInfo>& pages,
                                     gpu::chunk_intermediate_data const& id,
-                                    size_t min_row,
+                                    size_t skip_rows,
                                     size_t num_rows,
                                     bool uses_custom_row_bounds)
 {
@@ -127,7 +127,7 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   if (uses_custom_row_bounds) {
     gpu::ComputePageSizes(pages,
                           chunks,
-                          min_row,
+                          skip_rows,
                           num_rows,
                           false,  // num_rows is already computed
                           false,  // no need to compute string sizes
@@ -220,8 +220,8 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
 void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                                     hostdevice_vector<gpu::PageInfo>& pages,
                                     hostdevice_vector<gpu::PageNestingInfo>& page_nesting,
-                                    size_t min_row,
-                                    size_t total_rows)
+                                    size_t skip_rows,
+                                    size_t num_rows)
 {
   // TODO (dm): hd_vec should have begin and end iterator members
   size_t sum_max_depths =
@@ -316,7 +316,7 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
   chunk_nested_valids.host_to_device(_stream);
   chunk_nested_data.host_to_device(_stream);
 
-  gpu::DecodePageData(pages, chunks, total_rows, min_row, _stream);
+  gpu::DecodePageData(pages, chunks, num_rows, skip_rows, _stream);
 
   _stream.synchronize();
 
@@ -434,22 +434,22 @@ reader::impl::impl(std::size_t chunk_read_limit,
 void reader::impl::prepare_data(size_type skip_rows,
                                 size_type num_rows,
                                 bool uses_custom_row_bounds,
-                                std::vector<std::vector<size_type>> const& row_group_list)
+                                std::vector<std::vector<size_type>> const& row_group_indices)
 {
   if (_file_preprocessed) { return; }
 
   const auto [skip_rows_corrected, num_rows_corrected, row_groups_info] =
-    _metadata->select_row_groups(row_group_list, skip_rows, num_rows);
+    _metadata->select_row_groups(row_group_indices, skip_rows, num_rows);
 
   if (num_rows_corrected > 0 && row_groups_info.size() != 0 && _input_columns.size() != 0) {
     load_and_decompress_data(row_groups_info, num_rows_corrected);
 
-    preprocess_columns(_file_itm_data.chunks,
-                       _file_itm_data.pages_info,
-                       skip_rows_corrected,
-                       num_rows_corrected,
-                       uses_custom_row_bounds,
-                       _chunk_read_limit);
+    compute_chunk_read_info(_file_itm_data.chunks,
+                            _file_itm_data.pages_info,
+                            skip_rows_corrected,
+                            num_rows_corrected,
+                            uses_custom_row_bounds,
+                            _chunk_read_limit);
 
     if (_chunk_read_limit == 0) {  // read the whole file at once
       CUDF_EXPECTS(_chunk_read_info.size() == 1,
@@ -545,7 +545,7 @@ table_with_metadata reader::impl::finalize_output(table_metadata& out_metadata,
 table_with_metadata reader::impl::read(size_type skip_rows,
                                        size_type num_rows,
                                        bool uses_custom_row_bounds,
-                                       std::vector<std::vector<size_type>> const& row_group_list)
+                                       std::vector<std::vector<size_type>> const& row_group_indices)
 {
 #if defined(ALLOW_PLAIN_READ_CHUNK_LIMIT)
   prepare_data(
@@ -553,7 +553,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
   return read_chunk_internal(uses_custom_row_bounds || _chunk_read_limit > 0);
 #else
   CUDF_EXPECTS(_chunk_read_limit == 0, "Reading the whole file must not have non-zero byte_limit.");
-  prepare_data(skip_rows, num_rows, uses_custom_row_bounds, row_group_list);
+  prepare_data(skip_rows, num_rows, uses_custom_row_bounds, row_group_indices);
   return read_chunk_internal(uses_custom_row_bounds);
 #endif
 }
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index b936c027ef1..bd22abe393c 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -70,8 +70,8 @@ class reader::impl {
    *
    * @param skip_rows Number of rows to skip from the start
    * @param num_rows Number of rows to read
-   * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific
-   * bounds
+   * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
+   *        bounds
    * @param row_group_indices Lists of row groups to read, one per source
    *
    * @return The set of columns along with metadata
@@ -114,25 +114,14 @@ class reader::impl {
    *
    * @param skip_rows Number of rows to skip from the start
    * @param num_rows Number of rows to read
-   * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific
+   * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
    *        bounds
    * @param row_group_indices Lists of row groups to read, one per source
    */
   void prepare_data(size_type skip_rows,
                     size_type num_rows,
                     bool uses_custom_row_bounds,
-                    const std::vector<std::vector<size_type>>& row_group_list);
-
-  /**
-   * @brief Read a chunk of data and return an output table.
-   *
-   * This function is called internally and expects all preprocessing steps have been done.
-   *
-   * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific
-   *        bounds
-   * @return The output table along with columns' metadata
-   */
-  table_with_metadata read_chunk_internal(bool uses_custom_row_bounds);
+                    const std::vector<std::vector<size_type>>& row_group_indices);
 
   /**
    * @brief Load and decompress the input file(s) into memory.
@@ -141,19 +130,31 @@ class reader::impl {
                                 size_type num_rows);
 
   /**
-   * @brief Finalize the output table by adding empty columns for the non-selected columns in
-   * schema.
+   * @brief Compute the reading info (skip_rows, num_rows) for the output chunks.
    *
-   * @param out_metadata The output table metadata
-   * @param out_columns The columns for building the output table
-   * @return
+   * There are several pieces of information we can't compute directly from row counts in
+   * the parquet headers when dealing with nested schemas:
+   * - The total sizes of all output columns at all nesting levels
+   * - The starting output buffer offset for each page, for each nesting level
+   *
+   * For flat schemas, these values are computed during header decoding (see gpuDecodePageHeaders).
+   *
+   * @param chunks All chunks to be decoded
+   * @param pages All pages to be decoded
+   * @param skip_rows Crop all rows below skip_rows
+   * @param num_rows Maximum number of rows to read
+   * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
+   *        bounds
    */
-  table_with_metadata finalize_output(table_metadata& out_metadata,
-                                      std::vector<std::unique_ptr<column>>& out_columns);
+  void compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                               hostdevice_vector<gpu::PageInfo>& pages,
+                               size_t skip_rows,
+                               size_t num_rows,
+                               bool uses_custom_row_bounds,
+                               size_type chunked_read_size);
 
   /**
-   * @brief Allocate nesting information storage for all pages and set pointers
-   *        to it.
+   * @brief Allocate nesting information storage for all pages and set pointers to it.
    *
    * One large contiguous buffer of PageNestingInfo structs is allocated and
    * distributed among the PageInfo structs.
@@ -170,47 +171,43 @@ class reader::impl {
                              hostdevice_vector<gpu::PageNestingInfo>& page_nesting_info);
 
   /**
-   * @brief Preprocess column information and allocate output buffers.
-   *
-   * TODO
+   * @brief Read a chunk of data and return an output table.
    *
-   * There are several pieces of information we can't compute directly from row counts in
-   * the parquet headers when dealing with nested schemas.
-   * - The total sizes of all output columns at all nesting levels
-   * - The starting output buffer offset for each page, for each nesting level
+   * This function is called internally and expects all preprocessing steps have been done.
    *
-   * For flat schemas, these values are computed during header decoding (see gpuDecodePageHeaders)
+   * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
+   *        bounds
+   * @return The output table along with columns' metadata
+   */
+  table_with_metadata read_chunk_internal(bool uses_custom_row_bounds);
+
+  /**
+   * @brief Finalize the output table by adding empty columns for the non-selected columns in
+   * schema.
    *
-   * @param chunks All chunks to be decoded
-   * @param pages All pages to be decoded
-   * @param min_rows crop all rows below min_row
-   * @param total_rows Maximum number of rows to read
-   * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific
-   * bounds
-   * a preprocess.
+   * @param out_metadata The output table metadata
+   * @param out_columns The columns for building the output table
+   * @return The output table along with columns' metadata
    */
-  void preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                          hostdevice_vector<gpu::PageInfo>& pages,
-                          size_t min_row,
-                          size_t total_rows,
-                          bool uses_custom_row_bounds,
-                          size_type chunked_read_size);
+  table_with_metadata finalize_output(table_metadata& out_metadata,
+                                      std::vector<std::unique_ptr<column>>& out_columns);
 
   /**
-   * TODO
-   * @brief allocate_columns
-   * @param chunks
-   * @param pages
-   * @param id
-   * @param min_row
-   * @param total_rows
-   * @param uses_custom_row_bounds
+   * @brief Allocate data bufers for the output columns.
+   *
+   * @param chunks List of column chunk descriptors
+   * @param pages List of page information
+   * @param id The chunk intermediate data
+   * @param skip_rows Crop all rows below skip_rows
+   * @param num_rows Maximum number of rows to read
+   * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
+   *        bounds
    */
   void allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                         hostdevice_vector<gpu::PageInfo>& pages,
                         gpu::chunk_intermediate_data const& id,
-                        size_t min_row,
-                        size_t total_rows,
+                        size_t skip_rows,
+                        size_t num_rows,
                         bool uses_custom_row_bounds);
 
   /**
@@ -219,14 +216,14 @@ class reader::impl {
    * @param chunks List of column chunk descriptors
    * @param pages List of page information
    * @param page_nesting Page nesting array
-   * @param min_row Minimum number of rows from start
-   * @param total_rows Number of rows to output
+   * @param skip_rows Minimum number of rows from start
+   * @param num_rows Number of rows to output
    */
   void decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                         hostdevice_vector<gpu::PageInfo>& pages,
                         hostdevice_vector<gpu::PageNestingInfo>& page_nesting,
-                        size_t min_row,
-                        size_t total_rows);
+                        size_t skip_rows,
+                        size_t num_rows);
 
  private:
   rmm::cuda_stream_view _stream;
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cu b/cpp/src/io/parquet/reader_impl_helpers.cu
index b8dc10e4559..a9199df6651 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cu
+++ b/cpp/src/io/parquet/reader_impl_helpers.cu
@@ -328,19 +328,19 @@ std::vector<std::string> aggregate_reader_metadata::get_pandas_index_names() con
 
 std::tuple<size_type, size_type, std::vector<row_group_info>>
 aggregate_reader_metadata::select_row_groups(
-  std::vector<std::vector<size_type>> const& row_groups_list,
+  std::vector<std::vector<size_type>> const& row_group_indices,
   size_type row_start,
   size_type row_count) const
 {
   std::vector<row_group_info> selection;
 
-  if (!row_groups_list.empty()) {
-    CUDF_EXPECTS(row_groups_list.size() == per_file_metadata.size(),
+  if (!row_group_indices.empty()) {
+    CUDF_EXPECTS(row_group_indices.size() == per_file_metadata.size(),
                  "Must specify row groups for each source");
 
     row_count = 0;
-    for (size_t src_idx = 0; src_idx < row_groups_list.size(); ++src_idx) {
-      for (auto const& rowgroup_idx : row_groups_list[src_idx]) {
+    for (size_t src_idx = 0; src_idx < row_group_indices.size(); ++src_idx) {
+      for (auto const& rowgroup_idx : row_group_indices[src_idx]) {
         CUDF_EXPECTS(
           rowgroup_idx >= 0 &&
             rowgroup_idx < static_cast<size_type>(per_file_metadata[src_idx].row_groups.size()),
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cuh b/cpp/src/io/parquet/reader_impl_helpers.cuh
index 77f8cfc518e..54650046779 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cuh
+++ b/cpp/src/io/parquet/reader_impl_helpers.cuh
@@ -164,7 +164,7 @@ class aggregate_reader_metadata {
    * The input `row_start` and `row_count` parameters will be recomputed and output as the valid
    * values based on the input row group list.
    *
-   * @param row_groups Lists of row groups to read, one per source
+   * @param row_group_indices Lists of row groups to read, one per source
    * @param row_start Starting row of the selection
    * @param row_count Total number of rows selected
    *
@@ -172,7 +172,7 @@ class aggregate_reader_metadata {
    *         starting row
    */
   [[nodiscard]] std::tuple<size_type, size_type, std::vector<row_group_info>> select_row_groups(
-    std::vector<std::vector<size_type>> const& row_groups_list,
+    std::vector<std::vector<size_type>> const& row_group_indices,
     size_type row_start,
     size_type row_count) const;
 
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 7658e7390ed..8836a705fb3 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -1109,12 +1109,12 @@ struct chunk_row_output_iter {
 
 }  // anonymous namespace
 
-void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                      hostdevice_vector<gpu::PageInfo>& pages,
-                                      size_t min_row,
-                                      size_t num_rows,
-                                      bool uses_custom_row_bounds,
-                                      size_type chunked_read_size)
+void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                                           hostdevice_vector<gpu::PageInfo>& pages,
+                                           size_t skip_rows,
+                                           size_t num_rows,
+                                           bool uses_custom_row_bounds,
+                                           size_type chunked_read_size)
 {
   // iterate over all input columns and determine if they contain lists so we can further
   // preprocess them.
@@ -1258,7 +1258,7 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
   _chunk_read_info =
     chunked_read_size > 0
       ? compute_splits(pages, _chunk_itm_data, num_rows, chunked_read_size, _stream)
-      : std::vector<gpu::chunk_read_info>{{min_row, num_rows}};
+      : std::vector<gpu::chunk_read_info>{{skip_rows, num_rows}};
 }
 
 }  // namespace cudf::io::detail::parquet

From 6fd3e90152324197ae160ce65079df8e707e92fc Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 27 Oct 2022 16:32:55 -0700
Subject: [PATCH 113/162] Add more tests

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl_preprocess.cu |  4 ++++
 cpp/tests/io/parquet_chunked_reader_test.cpp | 14 ++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 083350960cd..4181636534f 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -910,6 +910,10 @@ std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> c
                                               size_type num_rows,
                                               size_t chunked_read_size)
 {
+  //  for (auto x : sizes) {
+  //    printf("size: %d | %d \n", (int)x.row_count, (int)x.size_bytes);
+  //  }
+
   // now we have an array of {row_count, real output bytes}. just walk through it and generate
   // splits.
   // TODO: come up with a clever way to do this entirely in parallel. For now, as long as batch
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 7c8db2889fe..02eaf0fc57d 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -380,6 +380,13 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsNoNulls)
     EXPECT_EQ(num_chunks, 3);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
+
+  // chunk size 2 pages minus one byte: each chunk will be just one page
+  {
+    auto const [expected, result, num_chunks] = do_test(400'007, false);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
 }
 
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsHavingNulls)
@@ -442,6 +449,13 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsHavingNulls)
     EXPECT_EQ(num_chunks, 3);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
+
+  // chunk size 2 pages minus 1 byte: each chunk will be just one page
+  {
+    auto const [expected, result, num_chunks] = do_test(285'007, false);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
 }
 
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructsOfLists)

From 9dda980bd5bb5cf71af5ced98b82058ef6a27e85 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 28 Oct 2022 14:01:44 -0700
Subject: [PATCH 114/162] Add test with empty data

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu            |  4 ++-
 cpp/tests/io/parquet_chunked_reader_test.cpp | 27 +++++++++++++++++---
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 83f15b2659d..fcd3f3f94f9 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -469,7 +469,9 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
   auto out_columns = std::vector<std::unique_ptr<column>>{};
   out_columns.reserve(_output_buffers.size());
 
-  if (!has_next()) { return finalize_output(out_metadata, out_columns); }
+  if (!has_next() || _chunk_read_info.size() == 0) {
+    return finalize_output(out_metadata, out_columns);
+  }
 
   auto const& read_info = _chunk_read_info[_current_read_chunk++];
 
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 02eaf0fc57d..6d245d65c27 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -59,7 +59,6 @@ using strings_col      = cudf::test::strings_column_wrapper;
 using structs_col      = cudf::test::structs_column_wrapper;
 using int32s_lists_col = cudf::test::lists_column_wrapper<int32_t>;
 
-// TODO: Remove the last 2 params
 auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
                 std::string const& filename,
                 bool nullable,
@@ -116,15 +115,18 @@ auto chunked_read(std::string const& filepath, std::size_t byte_limit)
   auto num_chunks = 0;
   auto result     = std::make_unique<cudf::table>();
 
-  while (reader.has_next()) {
+  do {
     auto chunk = reader.read_chunk();
     if (num_chunks == 0) {
       result = std::move(chunk.tbl);
     } else {
+      CUDF_EXPECTS(chunk.tbl->num_rows() != 0, "Number of rows in the new chunk is zero.");
       result = cudf::concatenate(std::vector<cudf::table_view>{result->view(), chunk.tbl->view()});
     }
     ++num_chunks;
-  }
+
+    if (result->num_rows() == 0) { break; }
+  } while (reader.has_next());
 
   return std::pair(std::move(result), num_chunks);
 }
@@ -134,6 +136,25 @@ auto chunked_read(std::string const& filepath, std::size_t byte_limit)
 struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
 };
 
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadNoData)
+{
+  auto const do_test = []() {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    input_columns.emplace_back(int32s_col{}.release());
+    input_columns.emplace_back(int64s_col{}.release());
+
+    auto [input_table, filepath] = write_file(input_columns, "chunked_read_empty", false);
+    auto [result, num_chunks]    = chunked_read(filepath, 1'000);
+    return std::tuple{std::move(input_table), std::move(result), num_chunks};
+  };
+
+  auto const [expected, result, num_chunks] = do_test();
+  EXPECT_EQ(num_chunks, 1);
+  EXPECT_EQ(result->num_rows(), 0);
+  EXPECT_EQ(result->num_columns(), 2);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+}
+
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
 {
   auto constexpr num_rows = 40'000;

From bb35f9f0f6e3c6fe4244dc9b655af256b77434fa Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 28 Oct 2022 14:04:44 -0700
Subject: [PATCH 115/162] Add tests

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 6d245d65c27..0e24cb81503 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -227,6 +227,13 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadBoundaryCases)
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
+  // test with a limit exactly the size of two pages of data minus one byte
+  {
+    auto const [expected, result, num_chunks] = do_test(159'999);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
   // test with a limit exactly the size of two pages of data
   {
     auto const [expected, result, num_chunks] = do_test(160'000);

From 59166cf6bb8f2c34766125b02e5147bdc99e74d6 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 28 Oct 2022 14:32:35 -0700
Subject: [PATCH 116/162] Rewrite null tests

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 52 +++++++++-----------
 1 file changed, 23 insertions(+), 29 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 0e24cb81503..7c11bb5b8b7 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -68,9 +68,9 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
   // Just shift nulls of the next column by one position to avoid having all nulls in the same
   // table rows.
   if (nullable) {
-    // Generate deterministic bitmask instead of random bitmask for easy verification.
+    // Generate deterministic bitmask instead of random bitmask for easy computation of data size.
     auto const valid_iter = cudf::detail::make_counting_transform_iterator(
-      0, [&](int32_t i) -> bool { return static_cast<bool>(i % 2); });
+      0, [](cudf::size_type i) { return i % 4 == 3 ? 0 : 1; });
 
     cudf::size_type offset{0};
     for (auto& col : input_columns) {
@@ -362,7 +362,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsNoNulls)
 {
   auto constexpr num_rows = 100'000;
 
-  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
+  auto const do_test = [num_rows](std::size_t chunk_read_limit) {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     // 20000 rows in 1 page consist of:
     //
@@ -370,7 +370,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsNoNulls)
     // 30000 ints    :   120000 bytes
     // total         :   200004 bytes
     auto const template_lists = int32s_lists_col{
-      int32s_lists_col{}, int32s_lists_col{0}, int32s_lists_col{0, 1}, int32s_lists_col{0, 1, 2}};
+      int32s_lists_col{}, int32s_lists_col{0}, int32s_lists_col{1, 2}, int32s_lists_col{3, 4, 5}};
 
     auto const gather_iter =
       cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return i % 4; });
@@ -380,7 +380,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsNoNulls)
 
     auto [input_table, filepath] = write_file(input_columns,
                                               "chunked_read_with_lists",
-                                              nullable,
+                                              false /*nullable*/,
                                               512 * 1024,  // 512KB per page
                                               20000        // 20k rows per page
     );
@@ -390,28 +390,28 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsNoNulls)
 
   // chunk size slightly less than 1 page (forcing it to be at least 1 page per read)
   {
-    auto const [expected, result, num_chunks] = do_test(200'000, false);
+    auto const [expected, result, num_chunks] = do_test(200'000);
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // chunk size exactly 1 page
   {
-    auto const [expected, result, num_chunks] = do_test(200'004, false);
+    auto const [expected, result, num_chunks] = do_test(200'004);
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // chunk size 2 pages. 3 chunks (2 pages + 2 pages + 1 page)
   {
-    auto const [expected, result, num_chunks] = do_test(400'008, false);
+    auto const [expected, result, num_chunks] = do_test(400'008);
     EXPECT_EQ(num_chunks, 3);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // chunk size 2 pages minus one byte: each chunk will be just one page
   {
-    auto const [expected, result, num_chunks] = do_test(400'007, false);
+    auto const [expected, result, num_chunks] = do_test(400'007);
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
@@ -421,35 +421,29 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsHavingNulls)
 {
   auto constexpr num_rows = 100'000;
 
-  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
+  auto const do_test = [num_rows](std::size_t chunk_read_limit) {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     // 20000 rows in 1 page consist of:
     //
-    // 625 validity words :   2500 bytes
+    // 625 validity words :   2500 bytes   (a null every 4 rows: null at indices [3, 7, 11, ...])
     // 20001 offsets      :   80004  bytes
     // 15000 ints         :   60000 bytes
     // total              :   142504 bytes
-    auto const template_lists = int32s_lists_col{// these will all be null
-                                                 int32s_lists_col{},
-                                                 int32s_lists_col{0},
-                                                 int32s_lists_col{1, 2},
-                                                 int32s_lists_col{3, 4, 5}};
+    auto const template_lists =
+      int32s_lists_col{// these will all be null
+                       int32s_lists_col{},
+                       int32s_lists_col{0},
+                       int32s_lists_col{1, 2},
+                       int32s_lists_col{3, 4, 5, 6, 7, 8, 9} /* this list will be nullified out */};
     auto const gather_iter =
       cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return i % 4; });
     auto const gather_map = int32s_col(gather_iter, gather_iter + num_rows);
-    auto intermediate =
-      std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front());
-    auto const valids = cudf::detail::make_counting_transform_iterator(
-      0, [](cudf::size_type i) { return i % 4 == 3 ? 0 : 1; });
-    intermediate->set_null_mask(cudf::test::detail::make_null_mask(valids, valids + num_rows),
-                                num_rows / 4);
-
     input_columns.emplace_back(
-      cudf::purge_nonempty_nulls(cudf::lists_column_view{intermediate->view()}));
+      std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front()));
 
     auto [input_table, filepath] = write_file(input_columns,
                                               "chunked_read_with_lists_nulls",
-                                              nullable,
+                                              true /*nullable*/,
                                               512 * 1024,  // 512KB per page
                                               20000        // 20k rows per page
     );
@@ -459,28 +453,28 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsHavingNulls)
 
   // chunk size slightly less than 1 page (forcing it to be at least 1 page per read)
   {
-    auto const [input, result, num_chunks] = do_test(142'500, false);
+    auto const [input, result, num_chunks] = do_test(142'500);
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
   }
 
   // chunk size exactly 1 page
   {
-    auto const [input, result, num_chunks] = do_test(142'504, false);
+    auto const [input, result, num_chunks] = do_test(142'504);
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
   }
 
   // chunk size 2 pages. 3 chunks (2 pages + 2 pages + 1 page)
   {
-    auto const [expected, result, num_chunks] = do_test(285'008, false);
+    auto const [expected, result, num_chunks] = do_test(285'008);
     EXPECT_EQ(num_chunks, 3);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // chunk size 2 pages minus 1 byte: each chunk will be just one page
   {
-    auto const [expected, result, num_chunks] = do_test(285'007, false);
+    auto const [expected, result, num_chunks] = do_test(285'007);
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }

From eb6e9963282e1c403344991b77fce6b8d85ebf8e Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 28 Oct 2022 14:41:59 -0700
Subject: [PATCH 117/162] Add more extreme tests

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 126 +++++++++++++++++--
 1 file changed, 116 insertions(+), 10 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 7c11bb5b8b7..ad44b599828 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -185,7 +185,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
 
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadBoundaryCases)
 {
-  // tests some specific boundary conditions in the split calculations.
+  // Tests some specific boundary conditions in the split calculations.
 
   auto constexpr num_rows = 40'000;
 
@@ -194,54 +194,76 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadBoundaryCases)
     auto const value_iter = thrust::make_counting_iterator(0);
     input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release());
 
-    auto [input_table, filepath] = write_file(input_columns, "chunked_read_simple_boundary", false);
-    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
+    auto [input_table, filepath] =
+      write_file(input_columns, "chunked_read_simple_boundary", false /*nullable*/);
+    auto [result, num_chunks] = chunked_read(filepath, chunk_read_limit);
     return std::tuple{std::move(input_table), std::move(result), num_chunks};
   };
 
-  // test with a limit slightly less than one page of data
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [expected, result, num_chunks] = do_test(0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [expected, result, num_chunks] = do_test(1);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [expected, result, num_chunks] = do_test(2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a limit slightly less than one page of data
   {
     auto const [expected, result, num_chunks] = do_test(79'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
-  // test with a limit exactly the size one page of data
+  // Test with a limit exactly the size one page of data
   {
     auto const [expected, result, num_chunks] = do_test(80'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
-  // test with a limit slightly more the size one page of data
+  // Test with a limit slightly more the size one page of data
   {
     auto const [expected, result, num_chunks] = do_test(81'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
-  // test with a limit slightly less than two pages of data
+  // Test with a limit slightly less than two pages of data
   {
     auto const [expected, result, num_chunks] = do_test(159'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
-  // test with a limit exactly the size of two pages of data minus one byte
+  // Test with a limit exactly the size of two pages of data minus one byte
   {
     auto const [expected, result, num_chunks] = do_test(159'999);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
-  // test with a limit exactly the size of two pages of data
+  // Test with a limit exactly the size of two pages of data
   {
     auto const [expected, result, num_chunks] = do_test(160'000);
     EXPECT_EQ(num_chunks, 1);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
-  // test with a limit slightly more the size two pages of data
+  // Test with a limit slightly more the size two pages of data
   {
     auto const [expected, result, num_chunks] = do_test(161'000);
     EXPECT_EQ(num_chunks, 1);
@@ -345,6 +367,27 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructs)
     return std::tuple{std::move(input_table), std::move(result), num_chunks};
   };
 
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [expected, result, num_chunks] = do_test(0, false);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [expected, result, num_chunks] = do_test(1, false);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [expected, result, num_chunks] = do_test(2L << 40, false);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
   {
     auto const [expected, result, num_chunks] = do_test(500'000, false);
     EXPECT_EQ(num_chunks, 5);
@@ -388,6 +431,27 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsNoNulls)
     return std::tuple{std::move(input_table), std::move(result), num_chunks};
   };
 
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [expected, result, num_chunks] = do_test(0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [expected, result, num_chunks] = do_test(1);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [expected, result, num_chunks] = do_test(2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
   // chunk size slightly less than 1 page (forcing it to be at least 1 page per read)
   {
     auto const [expected, result, num_chunks] = do_test(200'000);
@@ -520,6 +584,27 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructsOfLists)
     return std::tuple{std::move(input_table), std::move(result), num_chunks};
   };
 
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [expected, result, num_chunks] = do_test(0, false);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [expected, result, num_chunks] = do_test(1, false);
+    EXPECT_EQ(num_chunks, 10);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [expected, result, num_chunks] = do_test(2L << 40, false);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
   {
     auto const [expected, result, num_chunks] = do_test(500'000, false);
     EXPECT_EQ(num_chunks, 10);
@@ -580,6 +665,27 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsOfStructs)
     return std::tuple{std::move(input_table), std::move(result), num_chunks};
   };
 
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [expected, result, num_chunks] = do_test(0, false);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [expected, result, num_chunks] = do_test(1, false);
+    EXPECT_EQ(num_chunks, 10);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [expected, result, num_chunks] = do_test(2L << 40, false);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
   {
     auto const [expected, result, num_chunks] = do_test(1'000'000, false);
     EXPECT_EQ(num_chunks, 7);

From 1c567947537b62a45e215758ba25c2a68fc8c5c3 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 28 Oct 2022 15:24:42 -0700
Subject: [PATCH 118/162] Rewrite tests to generate input files just once

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 373 ++++++++++++-------
 1 file changed, 236 insertions(+), 137 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index ad44b599828..27df448e548 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -138,17 +138,12 @@ struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
 
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadNoData)
 {
-  auto const do_test = []() {
-    std::vector<std::unique_ptr<cudf::column>> input_columns;
-    input_columns.emplace_back(int32s_col{}.release());
-    input_columns.emplace_back(int64s_col{}.release());
-
-    auto [input_table, filepath] = write_file(input_columns, "chunked_read_empty", false);
-    auto [result, num_chunks]    = chunked_read(filepath, 1'000);
-    return std::tuple{std::move(input_table), std::move(result), num_chunks};
-  };
+  std::vector<std::unique_ptr<cudf::column>> input_columns;
+  input_columns.emplace_back(int32s_col{}.release());
+  input_columns.emplace_back(int64s_col{}.release());
 
-  auto const [expected, result, num_chunks] = do_test();
+  auto const [expected, filepath] = write_file(input_columns, "chunked_read_empty", false);
+  auto const [result, num_chunks] = chunked_read(filepath, 1'000);
   EXPECT_EQ(num_chunks, 1);
   EXPECT_EQ(result->num_rows(), 0);
   EXPECT_EQ(result->num_columns(), 2);
@@ -159,25 +154,25 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
 {
   auto constexpr num_rows = 40'000;
 
-  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
+  auto const generate_input = [num_rows](bool nullable) {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     auto const value_iter = thrust::make_counting_iterator(0);
     input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release());
     input_columns.emplace_back(int64s_col(value_iter, value_iter + num_rows).release());
 
-    auto [input_table, filepath] = write_file(input_columns, "chunked_read_simple", nullable);
-    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
-    return std::tuple{std::move(input_table), std::move(result), num_chunks};
+    return write_file(input_columns, "chunked_read_simple", nullable);
   };
 
   {
-    auto const [expected, result, num_chunks] = do_test(240'000, false);
+    auto const [expected, filepath] = generate_input(false);
+    auto const [result, num_chunks] = chunked_read(filepath, 240'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   {
-    auto const [expected, result, num_chunks] = do_test(240'000, true);
+    auto const [expected, filepath] = generate_input(true);
+    auto const [result, num_chunks] = chunked_read(filepath, 240'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
@@ -189,83 +184,79 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadBoundaryCases)
 
   auto constexpr num_rows = 40'000;
 
-  auto const do_test = [](std::size_t chunk_read_limit) {
+  auto const [expected, filepath] = [num_rows]() {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     auto const value_iter = thrust::make_counting_iterator(0);
     input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release());
-
-    auto [input_table, filepath] =
-      write_file(input_columns, "chunked_read_simple_boundary", false /*nullable*/);
-    auto [result, num_chunks] = chunked_read(filepath, chunk_read_limit);
-    return std::tuple{std::move(input_table), std::move(result), num_chunks};
-  };
+    return write_file(input_columns, "chunked_read_simple_boundary", false /*nullable*/);
+  }();
 
   // Test with zero limit: everything will be read in one chunk
   {
-    auto const [expected, result, num_chunks] = do_test(0);
+    auto const [result, num_chunks] = chunked_read(filepath, 0);
     EXPECT_EQ(num_chunks, 1);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // Test with a very small limit: 1 byte
   {
-    auto const [expected, result, num_chunks] = do_test(1);
+    auto const [result, num_chunks] = chunked_read(filepath, 1);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // Test with a very large limit
   {
-    auto const [expected, result, num_chunks] = do_test(2L << 40);
+    auto const [result, num_chunks] = chunked_read(filepath, 2L << 40);
     EXPECT_EQ(num_chunks, 1);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // Test with a limit slightly less than one page of data
   {
-    auto const [expected, result, num_chunks] = do_test(79'000);
+    auto const [result, num_chunks] = chunked_read(filepath, 79'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // Test with a limit exactly the size one page of data
   {
-    auto const [expected, result, num_chunks] = do_test(80'000);
+    auto const [result, num_chunks] = chunked_read(filepath, 80'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // Test with a limit slightly more the size one page of data
   {
-    auto const [expected, result, num_chunks] = do_test(81'000);
+    auto const [result, num_chunks] = chunked_read(filepath, 81'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // Test with a limit slightly less than two pages of data
   {
-    auto const [expected, result, num_chunks] = do_test(159'000);
+    auto const [result, num_chunks] = chunked_read(filepath, 159'000);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // Test with a limit exactly the size of two pages of data minus one byte
   {
-    auto const [expected, result, num_chunks] = do_test(159'999);
+    auto const [result, num_chunks] = chunked_read(filepath, 159'999);
     EXPECT_EQ(num_chunks, 2);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // Test with a limit exactly the size of two pages of data
   {
-    auto const [expected, result, num_chunks] = do_test(160'000);
+    auto const [result, num_chunks] = chunked_read(filepath, 160'000);
     EXPECT_EQ(num_chunks, 1);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // Test with a limit slightly more the size two pages of data
   {
-    auto const [expected, result, num_chunks] = do_test(161'000);
+    auto const [result, num_chunks] = chunked_read(filepath, 161'000);
     EXPECT_EQ(num_chunks, 1);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
@@ -275,7 +266,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
 {
   auto constexpr num_rows = 60'000;
 
-  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
+  auto const generate_input = [num_rows](bool nullable) {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     auto const value_iter = thrust::make_counting_iterator(0);
 
@@ -297,13 +288,6 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
     });
     input_columns.emplace_back(strings_col(str_iter, str_iter + num_rows).release());
 
-    auto [input_table, filepath] = write_file(input_columns,
-                                              "chunked_read_with_strings",
-                                              nullable,
-                                              512 * 1024,  // 512KB per page
-                                              20000        // 20k rows per page
-    );
-
     // Cumulative sizes:
     // A0 + B0 :  180004
     // A1 + B1 :  420008
@@ -311,30 +295,75 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
     //                                    skip_rows / num_rows
     // byte_limit==500000  should give 2 chunks: {0, 40000}, {40000, 20000}
     // byte_limit==1000000 should give 1 chunks: {0, 60000},
-    auto [result, num_chunks] = chunked_read(filepath, chunk_read_limit);
-    return std::tuple{std::move(input_table), std::move(result), num_chunks};
+    return write_file(input_columns,
+                      "chunked_read_with_strings",
+                      nullable,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
+    );
   };
 
+  auto const [expected_no_null, filepath_no_null]       = generate_input(false);
+  auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true);
+
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Test with a very large limit
   {
-    auto const [expected, result, num_chunks] = do_test(500'000, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Other tests:
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 500'000);
     EXPECT_EQ(num_chunks, 2);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
   }
   {
-    auto const [expected, result, num_chunks] = do_test(500'000, true);
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 500'000);
     EXPECT_EQ(num_chunks, 2);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 
   {
-    auto const [expected, result, num_chunks] = do_test(1'000'000, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000);
     EXPECT_EQ(num_chunks, 1);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
   }
   {
-    auto const [expected, result, num_chunks] = do_test(1'000'000, true);
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000);
     EXPECT_EQ(num_chunks, 1);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 }
 
@@ -342,7 +371,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructs)
 {
   auto constexpr num_rows = 100'000;
 
-  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
+  auto const generate_input = [num_rows](bool nullable) {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     auto const int_iter = thrust::make_counting_iterator(0);
     input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
@@ -357,47 +386,64 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructs)
       return structs_col{{child1, child2, child3}}.release();
     }());
 
-    auto [input_table, filepath] = write_file(input_columns,
-                                              "chunked_read_with_structs",
-                                              nullable,
-                                              512 * 1024,  // 512KB per page
-                                              20000        // 20k rows per page
+    return write_file(input_columns,
+                      "chunked_read_with_structs",
+                      nullable,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
     );
-    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
-    return std::tuple{std::move(input_table), std::move(result), num_chunks};
   };
 
+  auto const [expected_no_null, filepath_no_null]       = generate_input(false);
+  auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true);
+
   // Test with zero limit: everything will be read in one chunk
   {
-    auto const [expected, result, num_chunks] = do_test(0, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 0);
     EXPECT_EQ(num_chunks, 1);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 
   // Test with a very small limit: 1 byte
   {
-    auto const [expected, result, num_chunks] = do_test(1, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1);
     EXPECT_EQ(num_chunks, 5);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 
   // Test with a very large limit
   {
-    auto const [expected, result, num_chunks] = do_test(2L << 40, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40);
     EXPECT_EQ(num_chunks, 1);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
   }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Other tests:
 
   {
-    auto const [expected, result, num_chunks] = do_test(500'000, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 500'000);
     EXPECT_EQ(num_chunks, 5);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
   }
-
   {
-    auto const [expected, result, num_chunks] = do_test(500'000, true);
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 500'000);
     EXPECT_EQ(num_chunks, 5);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 }
 
@@ -405,7 +451,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsNoNulls)
 {
   auto constexpr num_rows = 100'000;
 
-  auto const do_test = [num_rows](std::size_t chunk_read_limit) {
+  auto const [expected, filepath] = [num_rows]() {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     // 20000 rows in 1 page consist of:
     //
@@ -421,61 +467,59 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsNoNulls)
     input_columns.emplace_back(
       std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front()));
 
-    auto [input_table, filepath] = write_file(input_columns,
-                                              "chunked_read_with_lists",
-                                              false /*nullable*/,
-                                              512 * 1024,  // 512KB per page
-                                              20000        // 20k rows per page
+    return write_file(input_columns,
+                      "chunked_read_with_lists_no_null",
+                      false /*nullable*/,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
     );
-    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
-    return std::tuple{std::move(input_table), std::move(result), num_chunks};
-  };
+  }();
 
   // Test with zero limit: everything will be read in one chunk
   {
-    auto const [expected, result, num_chunks] = do_test(0);
+    auto const [result, num_chunks] = chunked_read(filepath, 0);
     EXPECT_EQ(num_chunks, 1);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // Test with a very small limit: 1 byte
   {
-    auto const [expected, result, num_chunks] = do_test(1);
+    auto const [result, num_chunks] = chunked_read(filepath, 1);
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // Test with a very large limit
   {
-    auto const [expected, result, num_chunks] = do_test(2L << 40);
+    auto const [result, num_chunks] = chunked_read(filepath, 2L << 40);
     EXPECT_EQ(num_chunks, 1);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // chunk size slightly less than 1 page (forcing it to be at least 1 page per read)
   {
-    auto const [expected, result, num_chunks] = do_test(200'000);
+    auto const [result, num_chunks] = chunked_read(filepath, 200'000);
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // chunk size exactly 1 page
   {
-    auto const [expected, result, num_chunks] = do_test(200'004);
+    auto const [result, num_chunks] = chunked_read(filepath, 200'004);
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // chunk size 2 pages. 3 chunks (2 pages + 2 pages + 1 page)
   {
-    auto const [expected, result, num_chunks] = do_test(400'008);
+    auto const [result, num_chunks] = chunked_read(filepath, 400'008);
     EXPECT_EQ(num_chunks, 3);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // chunk size 2 pages minus one byte: each chunk will be just one page
   {
-    auto const [expected, result, num_chunks] = do_test(400'007);
+    auto const [result, num_chunks] = chunked_read(filepath, 400'007);
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
@@ -485,7 +529,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsHavingNulls)
 {
   auto constexpr num_rows = 100'000;
 
-  auto const do_test = [num_rows](std::size_t chunk_read_limit) {
+  auto const [expected, filepath] = [num_rows]() {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     // 20000 rows in 1 page consist of:
     //
@@ -505,40 +549,59 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsHavingNulls)
     input_columns.emplace_back(
       std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front()));
 
-    auto [input_table, filepath] = write_file(input_columns,
-                                              "chunked_read_with_lists_nulls",
-                                              true /*nullable*/,
-                                              512 * 1024,  // 512KB per page
-                                              20000        // 20k rows per page
+    return write_file(input_columns,
+                      "chunked_read_with_lists_nulls",
+                      true /*nullable*/,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
     );
-    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
-    return std::tuple{std::move(input_table), std::move(result), num_chunks};
-  };
+  }();
+
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 1);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
 
   // chunk size slightly less than 1 page (forcing it to be at least 1 page per read)
   {
-    auto const [input, result, num_chunks] = do_test(142'500);
+    auto const [result, num_chunks] = chunked_read(filepath, 142'500);
     EXPECT_EQ(num_chunks, 5);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // chunk size exactly 1 page
   {
-    auto const [input, result, num_chunks] = do_test(142'504);
+    auto const [result, num_chunks] = chunked_read(filepath, 142'504);
     EXPECT_EQ(num_chunks, 5);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*input, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // chunk size 2 pages. 3 chunks (2 pages + 2 pages + 1 page)
   {
-    auto const [expected, result, num_chunks] = do_test(285'008);
+    auto const [result, num_chunks] = chunked_read(filepath, 285'008);
     EXPECT_EQ(num_chunks, 3);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
 
   // chunk size 2 pages minus 1 byte: each chunk will be just one page
   {
-    auto const [expected, result, num_chunks] = do_test(285'007);
+    auto const [result, num_chunks] = chunked_read(filepath, 285'007);
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
   }
@@ -548,7 +611,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructsOfLists)
 {
   auto constexpr num_rows = 100'000;
 
-  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
+  auto const generate_input = [num_rows](bool nullable) {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     auto const int_iter = thrust::make_counting_iterator(0);
     input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
@@ -574,47 +637,65 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructsOfLists)
       return structs_col(std::move(child_columns)).release();
     }());
 
-    auto [input_table, filepath] = write_file(input_columns,
-                                              "chunked_read_with_structs_of_lists",
-                                              nullable,
-                                              512 * 1024,  // 512KB per page
-                                              20000        // 20k rows per page
+    return write_file(input_columns,
+                      "chunked_read_with_structs_of_lists",
+                      nullable,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
     );
-    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
-    return std::tuple{std::move(input_table), std::move(result), num_chunks};
   };
 
+  auto const [expected_no_null, filepath_no_null]       = generate_input(false);
+  auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true);
+
   // Test with zero limit: everything will be read in one chunk
   {
-    auto const [expected, result, num_chunks] = do_test(0, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 0);
     EXPECT_EQ(num_chunks, 1);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 
   // Test with a very small limit: 1 byte
   {
-    auto const [expected, result, num_chunks] = do_test(1, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1);
     EXPECT_EQ(num_chunks, 10);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 
   // Test with a very large limit
   {
-    auto const [expected, result, num_chunks] = do_test(2L << 40, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40);
     EXPECT_EQ(num_chunks, 1);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 
+  // Other tests:
+
   {
-    auto const [expected, result, num_chunks] = do_test(500'000, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 500'000);
     EXPECT_EQ(num_chunks, 10);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
   }
 
   {
-    auto const [expected, result, num_chunks] = do_test(500'000, true);
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 500'000);
     EXPECT_EQ(num_chunks, 5);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 }
 
@@ -622,7 +703,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsOfStructs)
 {
   auto constexpr num_rows = 100'000;
 
-  auto const do_test = [num_rows](std::size_t chunk_read_limit, bool nullable) {
+  auto const generate_input = [num_rows](bool nullable) {
     std::vector<std::unique_ptr<cudf::column>> input_columns;
     auto const int_iter = thrust::make_counting_iterator(0);
     input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
@@ -655,46 +736,64 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsOfStructs)
                               0,
                               rmm::device_buffer{}));
 
-    auto [input_table, filepath] = write_file(input_columns,
-                                              "chunked_read_with_lists_of_structs",
-                                              nullable,
-                                              512 * 1024,  // 512KB per page
-                                              20000        // 20k rows per page
+    return write_file(input_columns,
+                      "chunked_read_with_lists_of_structs",
+                      nullable,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
     );
-    auto [result, num_chunks]    = chunked_read(filepath, chunk_read_limit);
-    return std::tuple{std::move(input_table), std::move(result), num_chunks};
   };
 
+  auto const [expected_no_null, filepath_no_null]       = generate_input(false);
+  auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true);
+
   // Test with zero limit: everything will be read in one chunk
   {
-    auto const [expected, result, num_chunks] = do_test(0, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 0);
     EXPECT_EQ(num_chunks, 1);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 
   // Test with a very small limit: 1 byte
   {
-    auto const [expected, result, num_chunks] = do_test(1, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1);
     EXPECT_EQ(num_chunks, 10);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 
   // Test with a very large limit
   {
-    auto const [expected, result, num_chunks] = do_test(2L << 40, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40);
     EXPECT_EQ(num_chunks, 1);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 
+  // Other tests:
+
   {
-    auto const [expected, result, num_chunks] = do_test(1'000'000, false);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000);
     EXPECT_EQ(num_chunks, 7);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
   }
 
   {
-    auto const [expected, result, num_chunks] = do_test(1'000'000, true);
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000);
     EXPECT_EQ(num_chunks, 5);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 }

From 4777419ef389888de0c9187e47f37689e8eb8527 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 28 Oct 2022 15:51:27 -0700
Subject: [PATCH 119/162] Fix tests with structs of lists

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 32 ++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 27df448e548..06a798ea90b 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -80,18 +80,50 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
       if (col->type().id() == cudf::type_id::STRUCT) {
         auto const null_mask  = col->view().null_mask();
         auto const null_count = col->null_count();
+        bool has_list         = false;
+
         for (cudf::size_type idx = 0; idx < col->num_children(); ++idx) {
           cudf::structs::detail::superimpose_parent_nulls(null_mask,
                                                           null_count,
                                                           col->child(idx),
                                                           cudf::get_default_stream(),
                                                           rmm::mr::get_current_device_resource());
+
+          if (col->child(idx).type().id() == cudf::type_id::LIST) { has_list = true; }
+        }
+
+        // If there is lists column in this struct column, rebuild it.
+        if (has_list) {
+          auto const dtype      = col->type();
+          auto const size       = col->size();
+          auto const null_count = col->null_count();
+          auto col_content      = col->release();
+
+          std::vector<std::unique_ptr<cudf::column>> children;
+          for (std::size_t idx = 0; idx < col_content.children.size(); ++idx) {
+            auto& child = col_content.children[idx];
+
+            if (child->type().id() == cudf::type_id::LIST) {
+              children.emplace_back(
+                cudf::purge_nonempty_nulls(cudf::lists_column_view{child->view()}));
+            } else {
+              children.emplace_back(std::move(child));
+            }
+          }
+
+          col = std::make_unique<cudf::column>(dtype,
+                                               size,
+                                               std::move(*col_content.data),
+                                               std::move(*col_content.null_mask),
+                                               null_count,
+                                               std::move(children));
         }
       } else if (col->type().id() == cudf::type_id::LIST) {
         col = cudf::purge_nonempty_nulls(cudf::lists_column_view{col->view()});
       }
     }
   }
+
   auto input_table = std::make_unique<cudf::table>(std::move(input_columns));
   auto filepath =
     temp_env->get_temp_filepath(nullable ? filename + "_nullable.parquet" : filename + ".parquet");

From aedc37ae6ba2d74efddd0442e4089aa025e31683 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 28 Oct 2022 16:15:11 -0700
Subject: [PATCH 120/162] Handle nulls for more complex types

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 06a798ea90b..b1968c72623 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -80,7 +80,7 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
       if (col->type().id() == cudf::type_id::STRUCT) {
         auto const null_mask  = col->view().null_mask();
         auto const null_count = col->null_count();
-        bool has_list         = false;
+        bool is_complex_type  = false;
 
         for (cudf::size_type idx = 0; idx < col->num_children(); ++idx) {
           cudf::structs::detail::superimpose_parent_nulls(null_mask,
@@ -89,11 +89,17 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
                                                           cudf::get_default_stream(),
                                                           rmm::mr::get_current_device_resource());
 
-          if (col->child(idx).type().id() == cudf::type_id::LIST) { has_list = true; }
+          if (auto const child_typeid = col->child(idx).type().id();
+              child_typeid == cudf::type_id::LIST || child_typeid == cudf::type_id::STRUCT ||
+              child_typeid == cudf::type_id::STRING) {
+            is_complex_type = true;
+          }
         }
 
         // If there is lists column in this struct column, rebuild it.
-        if (has_list) {
+        // Note that this does not recursively rebuild the column thus will not work correctly with
+        // nested types having many nested levels.
+        if (is_complex_type) {
           auto const dtype      = col->type();
           auto const size       = col->size();
           auto const null_count = col->null_count();
@@ -106,6 +112,12 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
             if (child->type().id() == cudf::type_id::LIST) {
               children.emplace_back(
                 cudf::purge_nonempty_nulls(cudf::lists_column_view{child->view()}));
+            } else if (child->type().id() == cudf::type_id::STRUCT) {
+              children.emplace_back(
+                cudf::purge_nonempty_nulls(cudf::structs_column_view{child->view()}));
+            } else if (child->type().id() == cudf::type_id::STRING) {
+              children.emplace_back(
+                cudf::purge_nonempty_nulls(cudf::strings_column_view{child->view()}));
             } else {
               children.emplace_back(std::move(child));
             }

From c00eb3cec24c5351372c4f528dc0041678e82eef Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 28 Oct 2022 16:17:38 -0700
Subject: [PATCH 121/162] Fix another nulls handling bug for strings

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index b1968c72623..6653b00bf30 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -132,6 +132,8 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
         }
       } else if (col->type().id() == cudf::type_id::LIST) {
         col = cudf::purge_nonempty_nulls(cudf::lists_column_view{col->view()});
+      } else if (col->type().id() == cudf::type_id::STRING) {
+        col = cudf::purge_nonempty_nulls(cudf::strings_column_view{col->view()});
       }
     }
   }

From 4d24f88d456c1d234014c35956e959eb4c54a868 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 28 Oct 2022 16:27:02 -0700
Subject: [PATCH 122/162] Simplify the null purging process

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 52 +++-----------------
 1 file changed, 8 insertions(+), 44 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 6653b00bf30..034b1281606 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -74,13 +74,13 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
 
     cudf::size_type offset{0};
     for (auto& col : input_columns) {
+      auto const col_typeid = col->type().id();
       col->set_null_mask(
         cudf::test::detail::make_null_mask(valid_iter + offset, valid_iter + col->size() + offset));
 
-      if (col->type().id() == cudf::type_id::STRUCT) {
+      if (col_typeid == cudf::type_id::STRUCT) {
         auto const null_mask  = col->view().null_mask();
         auto const null_count = col->null_count();
-        bool is_complex_type  = false;
 
         for (cudf::size_type idx = 0; idx < col->num_children(); ++idx) {
           cudf::structs::detail::superimpose_parent_nulls(null_mask,
@@ -88,51 +88,15 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
                                                           col->child(idx),
                                                           cudf::get_default_stream(),
                                                           rmm::mr::get_current_device_resource());
-
-          if (auto const child_typeid = col->child(idx).type().id();
-              child_typeid == cudf::type_id::LIST || child_typeid == cudf::type_id::STRUCT ||
-              child_typeid == cudf::type_id::STRING) {
-            is_complex_type = true;
-          }
         }
+      }
 
-        // If there is lists column in this struct column, rebuild it.
-        // Note that this does not recursively rebuild the column thus will not work correctly with
-        // nested types having many nested levels.
-        if (is_complex_type) {
-          auto const dtype      = col->type();
-          auto const size       = col->size();
-          auto const null_count = col->null_count();
-          auto col_content      = col->release();
-
-          std::vector<std::unique_ptr<cudf::column>> children;
-          for (std::size_t idx = 0; idx < col_content.children.size(); ++idx) {
-            auto& child = col_content.children[idx];
-
-            if (child->type().id() == cudf::type_id::LIST) {
-              children.emplace_back(
-                cudf::purge_nonempty_nulls(cudf::lists_column_view{child->view()}));
-            } else if (child->type().id() == cudf::type_id::STRUCT) {
-              children.emplace_back(
-                cudf::purge_nonempty_nulls(cudf::structs_column_view{child->view()}));
-            } else if (child->type().id() == cudf::type_id::STRING) {
-              children.emplace_back(
-                cudf::purge_nonempty_nulls(cudf::strings_column_view{child->view()}));
-            } else {
-              children.emplace_back(std::move(child));
-            }
-          }
-
-          col = std::make_unique<cudf::column>(dtype,
-                                               size,
-                                               std::move(*col_content.data),
-                                               std::move(*col_content.null_mask),
-                                               null_count,
-                                               std::move(children));
-        }
-      } else if (col->type().id() == cudf::type_id::LIST) {
+      // Can't use `cudf::detail::purge_nonempty_nulls` since it requires to be compiled with CUDA.
+      if (col_typeid == cudf::type_id::LIST) {
         col = cudf::purge_nonempty_nulls(cudf::lists_column_view{col->view()});
-      } else if (col->type().id() == cudf::type_id::STRING) {
+      } else if (col_typeid == cudf::type_id::STRUCT) {
+        col = cudf::purge_nonempty_nulls(cudf::structs_column_view{col->view()});
+      } else if (col_typeid == cudf::type_id::STRING) {
         col = cudf::purge_nonempty_nulls(cudf::strings_column_view{col->view()});
       }
     }

From b15bb39f692d71ec54a68ac18aebdfab55190920 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Mon, 31 Oct 2022 10:04:09 -0500
Subject: [PATCH 123/162] Cleanup.

---
 cpp/src/io/parquet/reader_impl.cu            | 165 ----------
 cpp/src/io/parquet/reader_impl_preprocess.cu | 325 ++++++++++++++-----
 2 files changed, 251 insertions(+), 239 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 83f15b2659d..6442e37279b 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -32,65 +32,6 @@ namespace cudf::io::detail::parquet {
 
 namespace {
 
-struct get_page_nesting_size {
-  size_type const src_col_schema;
-  size_type const depth;
-  gpu::PageInfo const* const pages;
-
-  __device__ size_type operator()(int index) const
-  {
-    auto const& page = pages[index];
-    if (page.src_col_schema != src_col_schema || page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
-      return 0;
-    }
-    return page.nesting[depth].size;
-  }
-};
-
-struct start_offset_output_iterator {
-  gpu::PageInfo* pages;
-  int const* page_indices;
-  int cur_index;
-  int src_col_schema;
-  int nesting_depth;
-  int empty               = 0;
-  using value_type        = size_type;
-  using difference_type   = size_type;
-  using pointer           = size_type*;
-  using reference         = size_type&;
-  using iterator_category = thrust::output_device_iterator_tag;
-
-  __host__ __device__ void operator=(start_offset_output_iterator const& other)
-  {
-    pages          = other.pages;
-    page_indices   = other.page_indices;
-    cur_index      = other.cur_index;
-    src_col_schema = other.src_col_schema;
-    nesting_depth  = other.nesting_depth;
-  }
-
-  __host__ __device__ start_offset_output_iterator operator+(int i)
-  {
-    return start_offset_output_iterator{
-      pages, page_indices, cur_index + i, src_col_schema, nesting_depth};
-  }
-
-  __host__ __device__ void operator++() { cur_index++; }
-
-  __device__ reference operator[](int i) { return dereference(cur_index + i); }
-  __device__ reference operator*() { return dereference(cur_index); }
-
- private:
-  __device__ reference dereference(int index)
-  {
-    gpu::PageInfo const& p = pages[page_indices[index]];
-    if (p.src_col_schema != src_col_schema || p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
-      return empty;
-    }
-    return p.nesting[nesting_depth].page_start_value;
-  }
-};
-
 /**
  * @brief Recursively copy the output buffer from one to another.
  *
@@ -111,112 +52,6 @@ void copy_output_buffer(column_buffer const& buff, column_buffer& new_buff)
 
 }  // namespace
 
-void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                    hostdevice_vector<gpu::PageInfo>& pages,
-                                    gpu::chunk_intermediate_data const& id,
-                                    size_t skip_rows,
-                                    size_t num_rows,
-                                    bool uses_custom_row_bounds)
-{
-  // computes:
-  // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
-  // PageInfo::skipped_values, which tells us where to start decoding in the input to respect the
-  // user bounds.
-  // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
-  // specified artifical bounds).
-  if (uses_custom_row_bounds) {
-    gpu::ComputePageSizes(pages,
-                          chunks,
-                          skip_rows,
-                          num_rows,
-                          false,  // num_rows is already computed
-                          false,  // no need to compute string sizes
-                          _stream);
-    // print_pages(pages, _stream);
-  }
-
-  // iterate over all input columns and allocate any associated output
-  // buffers if they are not part of a list hierarchy. mark down
-  // if we have any list columns that need further processing.
-  bool has_lists = false;
-  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
-    auto const& input_col  = _input_columns[idx];
-    size_t const max_depth = input_col.nesting_depth();
-
-    auto* cols = &_output_buffers;
-    for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
-      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
-      cols          = &out_buf.children;
-
-      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
-      // to know how big this buffer actually is.
-      if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
-        has_lists = true;
-      }
-      // if we haven't already processed this column because it is part of a struct hierarchy
-      else if (out_buf.size == 0) {
-        // add 1 for the offset if this is a list column
-        out_buf.create(
-          out_buf.type.id() == type_id::LIST && l_idx < max_depth ? num_rows + 1 : num_rows,
-          _stream,
-          _mr);
-      }
-    }
-  }
-
-  // compute output column sizes by examining the pages of the -input- columns
-  if (has_lists) {
-    auto& page_keys  = _chunk_itm_data.page_keys;
-    auto& page_index = _chunk_itm_data.page_index;
-    for (size_t idx = 0; idx < _input_columns.size(); idx++) {
-      auto const& input_col = _input_columns[idx];
-      auto src_col_schema   = input_col.schema_idx;
-      size_t max_depth      = input_col.nesting_depth();
-
-      auto* cols = &_output_buffers;
-      for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
-        auto& out_buf = (*cols)[input_col.nesting[l_idx]];
-        cols          = &out_buf.children;
-
-        // size iterator. indexes pages by sorted order
-        auto size_input = thrust::make_transform_iterator(
-          page_index.begin(),
-          get_page_nesting_size{src_col_schema, static_cast<size_type>(l_idx), pages.device_ptr()});
-
-        // if this buffer is part of a list hierarchy, we need to determine it's
-        // final size and allocate it here.
-        //
-        // for struct columns, higher levels of the output columns are shared between input
-        // columns. so don't compute any given level more than once.
-        if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) && out_buf.size == 0) {
-          int size =
-            thrust::reduce(rmm::exec_policy(_stream), size_input, size_input + pages.size());
-
-          // if this is a list column add 1 for non-leaf levels for the terminating offset
-          if (out_buf.type.id() == type_id::LIST && l_idx < max_depth) { size++; }
-
-          // allocate
-          out_buf.create(size, _stream, _mr);
-        }
-
-        // for nested hierarchies, compute per-page start offset
-        if (input_col.has_repetition) {
-          thrust::exclusive_scan_by_key(
-            rmm::exec_policy(_stream),
-            page_keys.begin(),
-            page_keys.end(),
-            size_input,
-            start_offset_output_iterator{pages.device_ptr(),
-                                         page_index.begin(),
-                                         0,
-                                         static_cast<int>(src_col_schema),
-                                         static_cast<int>(l_idx)});
-        }
-      }
-    }
-  }
-}
-
 void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                                     hostdevice_vector<gpu::PageInfo>& pages,
                                     hostdevice_vector<gpu::PageNestingInfo>& page_nesting,
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 4181636534f..52c939178ad 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -40,6 +40,7 @@
 #include <thrust/scan.h>
 #include <thrust/sort.h>
 #include <thrust/transform.h>
+#include <thrust/unique.h>
 
 namespace cudf::io::detail::parquet {
 
@@ -771,6 +772,12 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
 
 namespace {
 
+struct cumulative_row_info {
+  size_t row_count;   // cumulative row count
+  size_t size_bytes;  // cumulative size in bytes  
+  int key;            // schema index
+};
+
 #if defined(PREPROCESS_DEBUG)
 void print_pages(hostdevice_vector<gpu::PageInfo>& pages, rmm::cuda_stream_view _stream)
 {
@@ -790,26 +797,72 @@ void print_pages(hostdevice_vector<gpu::PageInfo>& pages, rmm::cuda_stream_view
   }
 }
 
-void print_chunks(hostdevice_vector<gpu::ColumnChunkDesc>& chunks, rmm::cuda_stream_view _stream)
+void print_cumulative_page_info(hostdevice_vector<gpu::PageInfo>& pages,
+                                rmm::device_uvector<int32_t> const& page_index, 
+                                rmm::device_uvector<cumulative_row_info> const& c_info,
+                                rmm::cuda_stream_view stream)
+{  
+  pages.device_to_host(stream, true);
+
+  printf("------------\nCumulative sizes by page\n");
+
+  std::vector<int> schemas(pages.size());
+  std::vector<int> h_page_index(pages.size());
+  cudaMemcpy(h_page_index.data(), page_index.data(), sizeof(int) * pages.size(), cudaMemcpyDeviceToHost);
+  std::vector<cumulative_row_info> h_cinfo(pages.size());
+  cudaMemcpy(h_cinfo.data(), c_info.data(), sizeof(cumulative_row_info) * pages.size(), cudaMemcpyDeviceToHost);
+  auto schema_iter = cudf::detail::make_counting_transform_iterator(0, [&](size_type i){
+    return pages[h_page_index[i]].src_col_schema;
+  });
+  thrust::copy(thrust::seq, schema_iter, schema_iter + pages.size(), schemas.begin());
+  auto last = thrust::unique(thrust::seq, schemas.begin(), schemas.end());
+  schemas.resize(last - schemas.begin());
+  printf("Num schemas: %lu\n", schemas.size());
+    
+  for(size_t idx=0; idx<schemas.size(); idx++){
+    printf("Schema %d\n", schemas[idx]);
+    for(size_t pidx=0; pidx<pages.size(); pidx++){
+      auto const& page = pages[h_page_index[pidx]];
+      if(page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY || page.src_col_schema != schemas[idx]){
+        continue;
+      }
+      printf("\tP: {%lu, %lu}\n", h_cinfo[pidx].row_count, h_cinfo[pidx].size_bytes);
+    }
+  }
+}
+
+void print_cumulative_row_info(std::vector<cumulative_row_info> const& sizes, std::string const& label, std::optional<std::vector<gpu::chunk_read_info>> splits = std::nullopt)
 {
-  chunks.device_to_host(_stream, true);
-  for (size_t idx = 0; idx < chunks.size(); idx++) {
-    auto const& c = chunks[idx];
-    printf("C(%lu, s:%d): num_values(%lu), start_row(%lu), num_rows(%u)\n",
-           idx,
-           c.src_col_schema,
-           c.num_values,
-           c.start_row,
-           c.num_rows);
+  if(splits.has_value()){
+    printf("------------\nSplits\n");
+    for(size_t idx=0; idx<splits->size(); idx++){
+      printf("{%lu, %lu}\n", splits.value()[idx].skip_rows, splits.value()[idx].num_rows);
+    }
+  }
+
+  printf("------------\nCumulative sizes %s\n", label.c_str());
+  for(size_t idx=0; idx<sizes.size(); idx++){
+    printf("{%lu, %lu, %d}", sizes[idx].row_count, sizes[idx].size_bytes, sizes[idx].key);
+    if(splits.has_value()){
+      // if we have a split at this row count and this is the last instance of this row count
+      auto start = thrust::make_transform_iterator(splits->begin(), [](gpu::chunk_read_info const& i) { return i.skip_rows; });
+      auto end = start + splits->size();
+      auto split = std::find(start, end, sizes[idx].row_count);
+      auto const split_index = [&]() -> int {
+        if(split != end && ((idx == sizes.size() - 1) || (sizes[idx+1].row_count > sizes[idx].row_count))){
+          return static_cast<int>(std::distance(start, split));
+        }   
+        return idx == 0 ? 0 : -1;
+      }();
+      if(split_index >= 0){
+        printf(" <-- split {%lu, %lu}", splits.value()[split_index].skip_rows, splits.value()[split_index].num_rows);
+      }
+    }
+    printf("\n");
   }
 }
 #endif  // PREPROCESS_DEBUG
 
-struct cumulative_row_info {
-  size_t row_count;   // cumulative row count
-  size_t size_bytes;  // cumulative size in bytes
-  int key;            // schema index
-};
 struct cumulative_row_sum {
   cumulative_row_info operator()
     __device__(cumulative_row_info const& a, cumulative_row_info const& b) const
@@ -836,6 +889,11 @@ template <>
 __device__ size_t row_size_functor::operator()<list_view>(size_t num_rows, bool nullable)
 {
   auto const offset_size = sizeof(offset_type);
+  // NOTE: Adding the + 1 offset here isn't strictly correct.  There will only be 1 extra offset
+  // for the entire column, whereas this is adding an extra offset per page.  So we will get a 
+  // small over-estimate of the real size of the order :  # of pages * 4 bytes. It seems better
+  // to overestimate size somewhat than to underestimate it and potentially generate chunks
+  // that are too large.
   return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable);
 }
 
@@ -851,6 +909,7 @@ __device__ size_t row_size_functor::operator()<string_view>(size_t num_rows, boo
   // only returns the size of offsets and validity. the size of the actual string chars
   // is tracked seperately.
   auto const offset_size = sizeof(offset_type);
+  // see note about offsets in the list_view template.
   return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable);
 }
 
@@ -878,7 +937,8 @@ struct get_cumulative_row_info {
 
     size_t const row_count = static_cast<size_t>(page.nesting[0].size);
     return {row_count,
-            thrust::reduce(thrust::seq, iter, iter + page.num_nesting_levels) + page.str_bytes};
+            thrust::reduce(thrust::seq, iter, iter + page.num_nesting_levels) + page.str_bytes,
+            page.src_col_schema};
   }
 };
 
@@ -898,11 +958,9 @@ struct row_total_size {
         0, [&] __device__(size_type i) { return c_info[start + i].row_count; });
       auto const page_index =
         (thrust::lower_bound(thrust::seq, iter, iter + (end - start), i.row_count) - iter) + start;
-      // printf("KI(%d): start(%d), end(%d), page_index(%d), size_bytes(%lu)\n", idx, start, end,
-      // (int)page_index, c_info[page_index].size_bytes);
       sum += c_info[page_index].size_bytes;
     }
-    return {i.row_count, sum};
+    return {i.row_count, sum, i.key};
   }
 };
 
@@ -952,11 +1010,12 @@ std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> c
       auto const start_row = cur_row_count;
       cur_row_count        = sizes[p].row_count;
       splits.push_back(gpu::chunk_read_info{start_row, cur_row_count - start_row});
-      // printf("Split: {%lu, %lu}\n", splits.back().skip_rows, splits.back().num_rows);
       cur_pos             = p;
       cur_cumulative_size = sizes[p].size_bytes;
     }
-  }
+  }  
+  // print_cumulative_row_info(sizes, "adjusted", splits);
+
   return splits;
 }
 
@@ -980,31 +1039,8 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
                                 page_input,
                                 c_info.begin(),
                                 thrust::equal_to{},
-                                cumulative_row_sum{});
-  // clang-format off
-  /*
-  stream.synchronize();
-  pages.device_to_host(stream);
-  std::vector<int> h_page_index(pages.size());
-  cudaMemcpyAsync(h_page_index.data(), page_index.begin(), sizeof(int) * pages.size(), cudaMemcpyDeviceToHost, stream);
-  stream.synchronize();
-  for(size_t idx=0; idx<pages.size(); idx++){
-    auto const& page = pages[h_page_index[idx]];
-    if(page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY){
-      continue;
-    }
-    gpu::PageNestingInfo pni;
-    cudaMemcpy(&pni, &page.nesting[0], sizeof(gpu::PageNestingInfo), cudaMemcpyDeviceToHost);
-    printf("P(%lu): schema(%d), num_rows(%d), nesting size 0(%d), str_bytes(%d)\n", idx, page.src_col_schema, page.num_rows, pni.size, page.str_bytes);
-  }
-  printf("---------\n");
-  std::vector<cumulative_row_info> h_c_info(page_keys.size());
-  cudaMemcpy(h_c_info.data(), c_info.data(), sizeof(cumulative_row_info) * page_keys.size(), cudaMemcpyDeviceToHost);
-  for(size_t idx=0; idx<page_keys.size(); idx++){
-    printf("C(%lu): %lu, %lu\n", idx, h_c_info[idx].row_count, h_c_info[idx].size_bytes);
-  }
-  */
-  // clang-format on
+                                cumulative_row_sum{});  
+  // print_cumulative_page_info(pages, page_index, c_info, stream);
 
   // sort by row count
   rmm::device_uvector<cumulative_row_info> c_info_sorted{c_info, stream};
@@ -1015,6 +1051,10 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
                  return a.row_count < b.row_count;
                });
 
+  std::vector<cumulative_row_info> h_c_info_sorted(c_info_sorted.size());
+  cudaMemcpy(h_c_info_sorted.data(), c_info_sorted.data(), sizeof(cumulative_row_info) * c_info_sorted.size(), cudaMemcpyDeviceToHost);
+  // print_cumulative_row_info(h_c_info_sorted, "raw");
+
   // generate key offsets (offsets to the start of each partition of keys). worst case is 1 page per
   // key
   rmm::device_uvector<size_type> key_offsets(page_keys.size() + 1, stream);
@@ -1025,29 +1065,8 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
                                                     thrust::make_discard_iterator(),
                                                     key_offsets.begin());
   size_t const num_unique_keys = key_offsets_end - key_offsets.begin();
-  // clang-format off
-  /*
-  stream.synchronize();
-  printf("Num keys: %d\n", (int)num_unique_keys);
-  std::vector<size_type> h_key_offsets(num_unique_keys);
-  cudaMemcpy(h_key_offsets.data(), key_offsets.data(), sizeof(size_type) * num_unique_keys, cudaMemcpyDeviceToHost);
-  for(size_t idx=0; idx<num_unique_keys; idx++){
-    printf("Offset sizes(%lu): %d\n", idx, h_key_offsets[idx]);
-  }
-  */
-  // clang-format on
-
   thrust::exclusive_scan(
     rmm::exec_policy(stream), key_offsets.begin(), key_offsets.end(), key_offsets.begin());
-  // clang-format off
-  /*
-  stream.synchronize();
-  cudaMemcpy(h_key_offsets.data(), key_offsets.data(), sizeof(size_type) * (num_unique_keys + 1), cudaMemcpyDeviceToHost);
-  for(size_t idx=0; idx<num_unique_keys+1; idx++){
-    printf("Offset values(%lu): %d\n", idx, h_key_offsets[idx]);
-  }
-  */
-  // clang-format on
 
   // adjust the cumulative info such that for each row count, the size includes any pages that span
   // that row count. this is so that if we have this case:
@@ -1074,13 +1093,6 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
                   cudaMemcpyDeviceToHost,
                   stream);
   stream.synchronize();
-  // clang-format off
-  /*
-  for(size_t idx=0; idx<h_adjusted.size(); idx++){
-    printf("A(%lu): %lu, %lu\n", idx, h_adjusted[idx].row_count, h_adjusted[idx].size_bytes);
-  }
-  */
-  // clang-format on
 
   return find_splits(h_adjusted, num_rows, chunked_read_size);
 }
@@ -1097,6 +1109,21 @@ struct get_page_schema {
   __device__ size_type operator()(gpu::PageInfo const& page) { return page.src_col_schema; }
 };
 
+struct get_page_nesting_size {
+  size_type const src_col_schema;
+  size_type const depth;
+  gpu::PageInfo const* const pages;
+
+  __device__ size_type operator()(int index) const
+  {
+    auto const& page = pages[index];
+    if (page.src_col_schema != src_col_schema || page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
+      return 0;
+    }
+    return page.nesting[depth].size;
+  }
+};
+
 struct chunk_row_output_iter {
   gpu::PageInfo* p;
   using value_type        = size_type;
@@ -1117,6 +1144,50 @@ struct chunk_row_output_iter {
   // __device__ void operator=(value_type v) { p->chunk_row = v; }
 };
 
+struct start_offset_output_iterator {
+  gpu::PageInfo* pages;
+  int const* page_indices;
+  int cur_index;
+  int src_col_schema;
+  int nesting_depth;
+  int empty               = 0;
+  using value_type        = size_type;
+  using difference_type   = size_type;
+  using pointer           = size_type*;
+  using reference         = size_type&;
+  using iterator_category = thrust::output_device_iterator_tag;
+
+  __host__ __device__ void operator=(start_offset_output_iterator const& other)
+  {
+    pages          = other.pages;
+    page_indices   = other.page_indices;
+    cur_index      = other.cur_index;
+    src_col_schema = other.src_col_schema;
+    nesting_depth  = other.nesting_depth;
+  }
+
+  __host__ __device__ start_offset_output_iterator operator+(int i)
+  {
+    return start_offset_output_iterator{
+      pages, page_indices, cur_index + i, src_col_schema, nesting_depth};
+  }
+
+  __host__ __device__ void operator++() { cur_index++; }
+
+  __device__ reference operator[](int i) { return dereference(cur_index + i); }
+  __device__ reference operator*() { return dereference(cur_index); }
+
+ private:
+  __device__ reference dereference(int index)
+  {
+    gpu::PageInfo const& p = pages[page_indices[index]];
+    if (p.src_col_schema != src_col_schema || p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
+      return empty;
+    }
+    return p.nesting[nesting_depth].page_start_value;
+  }
+};
+
 }  // anonymous namespace
 
 void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
@@ -1271,4 +1342,110 @@ void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDes
       : std::vector<gpu::chunk_read_info>{{skip_rows, num_rows}};
 }
 
+void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                                    hostdevice_vector<gpu::PageInfo>& pages,
+                                    gpu::chunk_intermediate_data const& id,
+                                    size_t skip_rows,
+                                    size_t num_rows,
+                                    bool uses_custom_row_bounds)
+{
+  // computes:
+  // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
+  // PageInfo::skipped_values, which tells us where to start decoding in the input to respect the
+  // user bounds.
+  // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
+  // specified artifical bounds).
+  if (uses_custom_row_bounds) {
+    gpu::ComputePageSizes(pages,
+                          chunks,
+                          skip_rows,
+                          num_rows,
+                          false,  // num_rows is already computed
+                          false,  // no need to compute string sizes
+                          _stream);
+    // print_pages(pages, _stream);
+  }
+
+  // iterate over all input columns and allocate any associated output
+  // buffers if they are not part of a list hierarchy. mark down
+  // if we have any list columns that need further processing.
+  bool has_lists = false;
+  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+    auto const& input_col  = _input_columns[idx];
+    size_t const max_depth = input_col.nesting_depth();
+
+    auto* cols = &_output_buffers;
+    for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
+      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+      cols          = &out_buf.children;
+
+      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
+      // to know how big this buffer actually is.
+      if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
+        has_lists = true;
+      }
+      // if we haven't already processed this column because it is part of a struct hierarchy
+      else if (out_buf.size == 0) {
+        // add 1 for the offset if this is a list column
+        out_buf.create(
+          out_buf.type.id() == type_id::LIST && l_idx < max_depth ? num_rows + 1 : num_rows,
+          _stream,
+          _mr);
+      }
+    }
+  }
+
+  // compute output column sizes by examining the pages of the -input- columns
+  if (has_lists) {
+    auto& page_keys  = _chunk_itm_data.page_keys;
+    auto& page_index = _chunk_itm_data.page_index;
+    for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+      auto const& input_col = _input_columns[idx];
+      auto src_col_schema   = input_col.schema_idx;
+      size_t max_depth      = input_col.nesting_depth();
+
+      auto* cols = &_output_buffers;
+      for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
+        auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+        cols          = &out_buf.children;
+
+        // size iterator. indexes pages by sorted order
+        auto size_input = thrust::make_transform_iterator(
+          page_index.begin(),
+          get_page_nesting_size{src_col_schema, static_cast<size_type>(l_idx), pages.device_ptr()});
+
+        // if this buffer is part of a list hierarchy, we need to determine it's
+        // final size and allocate it here.
+        //
+        // for struct columns, higher levels of the output columns are shared between input
+        // columns. so don't compute any given level more than once.
+        if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) && out_buf.size == 0) {
+          int size =
+            thrust::reduce(rmm::exec_policy(_stream), size_input, size_input + pages.size());
+
+          // if this is a list column add 1 for non-leaf levels for the terminating offset
+          if (out_buf.type.id() == type_id::LIST && l_idx < max_depth) { size++; }
+
+          // allocate
+          out_buf.create(size, _stream, _mr);
+        }
+
+        // for nested hierarchies, compute per-page start offset
+        if (input_col.has_repetition) {
+          thrust::exclusive_scan_by_key(
+            rmm::exec_policy(_stream),
+            page_keys.begin(),
+            page_keys.end(),
+            size_input,
+            start_offset_output_iterator{pages.device_ptr(),
+                                         page_index.begin(),
+                                         0,
+                                         static_cast<int>(src_col_schema),
+                                         static_cast<int>(l_idx)});
+        }
+      }
+    }
+  }
+}
+
 }  // namespace cudf::io::detail::parquet

From 321815d249e9d0b5af9662b0c44beee055ad2d86 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Mon, 31 Oct 2022 10:54:35 -0500
Subject: [PATCH 124/162] Fleshed out list-of-structs and struct-of-lists
 tests.

---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 89 ++++++++++++++++++--
 1 file changed, 84 insertions(+), 5 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 034b1281606..67f8e94f91b 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -696,17 +696,58 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructsOfLists)
 
   // Other tests:
 
+  // for these tests, different columns get written to different numbers of pages so it's a
+  // little tricky to describe the expected results by page counts. To get an idea of how
+  // these values are chosen, see the debug output from the call to print_cumulative_row_info() in 
+  // reader_impl_preprocess.cu -> find_splits()
+
   {
-    auto const [result, num_chunks] = chunked_read(filepath_no_null, 500'000);
-    EXPECT_EQ(num_chunks, 10);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000);
+    EXPECT_EQ(num_chunks, 7);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
   }
 
   {
-    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 500'000);
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000);
+    EXPECT_EQ(num_chunks, 4);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000);
+    EXPECT_EQ(num_chunks, 4);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000);
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'500'000);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2'000'000);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 5'000'000);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
 }
 
 TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsOfStructs)
@@ -793,12 +834,32 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsOfStructs)
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
 
-  // Other tests:
-
+  // for these tests, different columns get written to different numbers of pages so it's a
+  // little tricky to describe the expected results by page counts. To get an idea of how
+  // these values are chosen, see the debug output from the call to print_cumulative_row_info() in 
+  // reader_impl_preprocess.cu -> find_splits()  
   {
     auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000);
     EXPECT_EQ(num_chunks, 7);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }  
+  
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000);
+    EXPECT_EQ(num_chunks, 4);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }  
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000);
+    EXPECT_EQ(num_chunks, 4);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }  
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
   }
 
   {
@@ -806,4 +867,22 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsOfStructs)
     EXPECT_EQ(num_chunks, 5);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
   }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'500'000);
+    EXPECT_EQ(num_chunks, 4);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2'000'000);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 5'000'000);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
 }

From 8cf95e8707ded080168feab95b2e0e9e7edeaa56 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Mon, 31 Oct 2022 12:02:43 -0500
Subject: [PATCH 125/162] Docs and cleanup.

---
 cpp/src/io/parquet/page_data.cu              | 152 ++++++++----------
 cpp/src/io/parquet/parquet_gpu.hpp           |  51 +++---
 cpp/src/io/parquet/reader_impl.cu            |   2 +-
 cpp/src/io/parquet/reader_impl.hpp           |   2 +-
 cpp/src/io/parquet/reader_impl_preprocess.cu | 159 +++++++++++++------
 cpp/tests/io/parquet_chunked_reader_test.cpp |  14 +-
 6 files changed, 211 insertions(+), 169 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 9c25fe1e7a1..56351d7d45b 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -56,6 +56,8 @@ namespace io {
 namespace parquet {
 namespace gpu {
 
+namespace {
+
 struct page_state_s {
   const uint8_t* data_start;
   const uint8_t* data_end;
@@ -287,13 +289,15 @@ __device__ void gpuDecodeStream(
  *
  * @return The new output position
  */
-template<bool sizes_only>
-__device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s* s, int target_pos, int t)
+template <bool sizes_only>
+__device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s* s,
+                                                          int target_pos,
+                                                          int t)
 {
   const uint8_t* end = s->data_end;
   int dict_bits      = s->dict_bits;
   int pos            = s->dict_pos;
-  int str_len = 0;  
+  int str_len        = 0;
 
   while (pos < target_pos) {
     int is_literal, batch_len;
@@ -338,7 +342,7 @@ __device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s*
     __syncwarp();
     is_literal = shuffle(is_literal);
     batch_len  = shuffle(batch_len);
-    
+
     int len = 0;
     if (t < batch_len) {
       int dict_idx = s->dict_val;
@@ -361,20 +365,18 @@ __device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s*
           dict_idx &= (1 << dict_bits) - 1;
         }
       }
-      
+
       // if we're computing indices, store it off.
-      if constexpr(sizes_only){        
-        len = [&](){
-          // we may end up decoding more indices than we asked for. so don't include those in the size
-          // calculation
-          if(pos + t >= target_pos){
-            return 0;
-          }          
+      if constexpr (sizes_only) {
+        len = [&]() {
+          // we may end up decoding more indices than we asked for. so don't include those in the
+          // size calculation
+          if (pos + t >= target_pos) { return 0; }
           // TODO:  refactor this with gpuGetStringData / gpuGetStringSize
           uint32_t const dict_pos = (s->dict_bits > 0) ? dict_idx * sizeof(string_index_pair) : 0;
           if (target_pos && dict_pos < (uint32_t)s->dict_size) {
             const auto* src = reinterpret_cast<const string_index_pair*>(s->dict_base + dict_pos);
-            return src->second;            
+            return src->second;
           }
           return 0;
         }();
@@ -384,7 +386,7 @@ __device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s*
     }
 
     // if we're computing sizes, sum it
-    if constexpr(sizes_only){     
+    if constexpr (sizes_only) {
       typedef cub::WarpReduce<size_type> WarpReduce;
       __shared__ typename WarpReduce::TempStorage temp_storage;
       // note: str_len will only be valid on thread 0.
@@ -469,9 +471,9 @@ __device__ int gpuDecodeRleBooleans(volatile page_state_s* s, int target_pos, in
  * @return The new output position
  */
 __device__ size_type gpuInitStringDescriptors(volatile page_state_s* s, int target_pos, int t)
-{  
-  int pos = s->dict_pos;
-  int total_len = 0;    
+{
+  int pos       = s->dict_pos;
+  int total_len = 0;
 
   // This step is purely serial
   if (!t) {
@@ -529,21 +531,6 @@ inline __device__ std::pair<const char*, size_t> gpuGetStringData(volatile page_
   return {ptr, len};
 }
 
-/**
- * @brief Get the length of a string
- *
- * @param[in,out] s Page state input/output
- * @param[in] src_pos Source position
- *
- * @return The length of the string
- */
-inline __device__ size_t gpuGetStringSize(volatile page_state_s* s, int src_pos)
-{
-  if (s->dtype_len == 4) { return 4; }
-  auto [_, len] = gpuGetStringData(s, src_pos);
-  return len;
-}
-
 /**
  * @brief Output a string descriptor
  *
@@ -983,22 +970,24 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
         s->dtype_len = 8;  // Convert to 64-bit timestamp
       }
 
-      // NOTE: s->page.num_rows, s->col.chunk_row, s->first_row and s->num_rows will be 
+      // NOTE: s->page.num_rows, s->col.chunk_row, s->first_row and s->num_rows will be
       // invalid/bogus during first pass of the preprocess step for nested types. this is ok
       // because we ignore these values in that stage.
-      { 
+      {
         auto const max_row = min_row + num_rows;
-        
+
         // if we are totally outside the range of the input, do nothing
-        if((page_start_row > max_row) || (page_start_row + s->page.num_rows < min_row)){
+        if ((page_start_row > max_row) || (page_start_row + s->page.num_rows < min_row)) {
           s->first_row = 0;
-          s->num_rows = 0;
+          s->num_rows  = 0;
         }
         // otherwise
         else {
-          s->first_row = page_start_row >= min_row ? 0 : min_row - page_start_row;
+          s->first_row             = page_start_row >= min_row ? 0 : min_row - page_start_row;
           auto const max_page_rows = s->page.num_rows - s->first_row;
-          s->num_rows = (page_start_row + s->first_row) + max_page_rows <= max_row ? max_page_rows : max_row - (page_start_row + s->first_row);
+          s->num_rows              = (page_start_row + s->first_row) + max_page_rows <= max_row
+                                       ? max_page_rows
+                                       : max_row - (page_start_row + s->first_row);
         }
       }
 
@@ -1136,9 +1125,10 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
       if (decode_step) {
         s->input_value_count = s->page.skipped_values > -1 ? s->page.skipped_values : 0;
       } else {
-        s->input_value_count        = 0;
-        s->input_leaf_count         = 0;
-        s->page.skipped_values      = -1;   // magic number to indicate it hasn't been set for use inside UpdatePageSizes
+        s->input_value_count = 0;
+        s->input_leaf_count  = 0;
+        s->page.skipped_values =
+          -1;  // magic number to indicate it hasn't been set for use inside UpdatePageSizes
         s->page.skipped_leaf_values = 0;
       }
     }
@@ -1473,7 +1463,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   int input_leaf_count = s->input_leaf_count;
   // how many rows we've processed in the page so far
   int input_row_count = s->input_row_count;
-  
+
   // how many valid leaves we've processed
   // int input_leaf_valid_count = s->input_leaf_valid_count;
 
@@ -1490,7 +1480,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
     // is this thread within row bounds? on the first pass we don't know the bounds, so we will be
     // computing the full size of the column.  on the second pass, we will know our actual row
     // bounds, so the computation will cap sizes properly.
-    int in_row_bounds                = 1;
+    int in_row_bounds = 1;
     if (bounds_set) {
       // absolute row index
       int32_t thread_row_index =
@@ -1522,9 +1512,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
       int const in_nesting_bounds =
         (s_idx >= start_depth && s_idx <= end_depth && in_row_bounds) ? 1 : 0;
       uint32_t const count_mask = ballot(in_nesting_bounds);
-      if (!t) {
-        pni->size += __popc(count_mask);
-      }
+      if (!t) { pni->size += __popc(count_mask); }
 
       /*
       if (s_idx == max_depth - 1) {
@@ -1545,23 +1533,23 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
     s->input_value_count = target_input_value_count;
     s->input_leaf_count  = input_leaf_count;
     // s->input_leaf_valid_count = input_leaf_valid_count;
-    s->input_row_count   = input_row_count;
+    s->input_row_count = input_row_count;
   }
 }
 
 __device__ size_type gpuGetStringSizes(page_state_s* s, int target_count, int t)
-{  
-  auto dict_target_pos = target_count;  
-  size_type str_len = 0;
+{
+  auto dict_target_pos = target_count;
+  size_type str_len    = 0;
   if (s->dict_base) {
-    auto const[new_target_pos, len] = gpuDecodeDictionaryIndices<true>(s, target_count, t);
-    dict_target_pos = new_target_pos;
-    str_len = len;
+    auto const [new_target_pos, len] = gpuDecodeDictionaryIndices<true>(s, target_count, t);
+    dict_target_pos                  = new_target_pos;
+    str_len                          = len;
   } else if ((s->col.data_type & 7) == BYTE_ARRAY) {
-    str_len = gpuInitStringDescriptors(s, target_count, t);    
+    str_len = gpuInitStringDescriptors(s, target_count, t);
   }
   if (!t) { *(volatile int32_t*)&s->dict_pos = dict_target_pos; }
-  return str_len;  
+  return str_len;
 }
 
 /**
@@ -1588,24 +1576,25 @@ __global__ void __launch_bounds__(block_size)
   __shared__ __align__(16) page_state_s state_g;
 
   page_state_s* const s = &state_g;
-  int  page_idx         = blockIdx.x;
+  int page_idx          = blockIdx.x;
   int t                 = threadIdx.x;
   PageInfo* pp          = &pages[page_idx];
 
-  if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, false)) {
-    return;
-  }
+  if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, false)) { return; }
 
   // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
   // containing lists anywhere within).
-  bool const has_repetition   = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
-  compute_string_sizes = compute_string_sizes && ((s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4);  
+  bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
+  compute_string_sizes =
+    compute_string_sizes && ((s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4);
 
   // reasons we might want to early out:
-  // - if this is a flat hierarchy (no lists) and is not a string column. in this case we don't need to do 
-  //   the expensive work of traversing the level data to determine sizes.  we can just compute it directly.
+  // - if this is a flat hierarchy (no lists) and is not a string column. in this case we don't need
+  // to do
+  //   the expensive work of traversing the level data to determine sizes.  we can just compute it
+  //   directly.
   // - if this is the trim pass and we have no rows to output for this page.
-  if (!has_repetition && !compute_string_sizes) {    
+  if (!has_repetition && !compute_string_sizes) {
     if (!t) {
       // note: doing this for all nesting levels because we can still have structs even if we don't
       // have lists.
@@ -1637,9 +1626,7 @@ __global__ void __launch_bounds__(block_size)
     }
   }
   // if we have no work to do for this page.
-  if(!compute_num_rows_pass && s->num_rows == 0){
-    return;
-  }
+  if (!compute_num_rows_pass && s->num_rows == 0) { return; }
   __syncthreads();
 
   // optimization : it might be useful to have a version of gpuDecodeStream that could go wider than
@@ -1662,29 +1649,24 @@ __global__ void __launch_bounds__(block_size)
       int actual_input_count = has_repetition ? min(s->lvl_count[level_type::REPETITION],
                                                     s->lvl_count[level_type::DEFINITION])
                                               : s->lvl_count[level_type::DEFINITION];
-            
+
       // process what we got back
       gpuUpdatePageSizes(s, actual_input_count, t, !compute_num_rows_pass);
       if (compute_string_sizes) {
         auto const str_len = gpuGetStringSizes(s, s->input_leaf_count, t);
-        if(!t){
-          s->page.str_bytes += str_len;
-        }
+        if (!t) { s->page.str_bytes += str_len; }
       }
 
-      target_input_count = actual_input_count + batch_size;  
+      target_input_count = actual_input_count + batch_size;
       __syncwarp();
     }
   }
   // update # rows in the actual page
   if (!t) {
-    if(compute_num_rows_pass){
-      pp->num_rows           = s->page.nesting[0].size;
-    }
+    if (compute_num_rows_pass) { pp->num_rows = s->page.nesting[0].size; }
     pp->skipped_values      = s->page.skipped_values;
     pp->skipped_leaf_values = s->page.skipped_leaf_values;
     pp->str_bytes           = s->page.str_bytes;
-    // printf("STR BYTES: %d\n", s->page.str_bytes);
   }
 }
 
@@ -1714,9 +1696,7 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
   if (!setupLocalPageInfo(s, &pages[page_idx], chunks, min_row, num_rows, true)) { return; }
 
   // if we have no rows to do (eg, in a skip_rows/num_rows case)
-  if(s->num_rows == 0){
-    return;
-  }
+  if (s->num_rows == 0) { return; }
 
   if (s->dict_base) {
     out_thread0 = (s->dict_bits > 0) ? 64 : 32;
@@ -1753,7 +1733,8 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
 
       // WARP1: Decode dictionary indices, booleans or string positions
       if (s->dict_base) {
-        auto const[new_target_pos, _] = gpuDecodeDictionaryIndices<false>(s, src_target_pos, t & 0x1f);
+        auto const [new_target_pos, _] =
+          gpuDecodeDictionaryIndices<false>(s, src_target_pos, t & 0x1f);
         src_target_pos = new_target_pos;
       } else if ((s->col.data_type & 7) == BOOLEAN) {
         src_target_pos = gpuDecodeRleBooleans(s, src_target_pos, t & 0x1f);
@@ -1836,11 +1817,16 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
   }
 }
 
+}  // anonymous namespace
+
+/**
+ * @copydoc cudf::io::parquet::gpu::ComputePageSizes
+ */
 void ComputePageSizes(hostdevice_vector<PageInfo>& pages,
                       hostdevice_vector<ColumnChunkDesc> const& chunks,
                       size_t min_row,
                       size_t num_rows,
-                      bool trim_pass,
+                      bool compute_num_rows,
                       bool compute_string_sizes,
                       rmm::cuda_stream_view stream)
 {
@@ -1853,7 +1839,7 @@ void ComputePageSizes(hostdevice_vector<PageInfo>& pages,
   // If uses_custom_row_bounds is set to true, we have to do a second pass later that "trims"
   // the starting and ending read values to account for these bounds.
   gpuComputePageSizes<<<dim_grid, dim_block, 0, stream.value()>>>(
-    pages.device_ptr(), chunks, min_row, num_rows, trim_pass, compute_string_sizes);
+    pages.device_ptr(), chunks, min_row, num_rows, compute_num_rows, compute_string_sizes);
 }
 
 /**
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 060d46c1ced..b7564684e27 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -445,45 +445,36 @@ void BuildStringDictionaryIndex(ColumnChunkDesc* chunks,
                                 int32_t num_chunks,
                                 rmm::cuda_stream_view stream);
 
-void ComputePageSizes(hostdevice_vector<PageInfo>& pages,
-                      hostdevice_vector<ColumnChunkDesc> const& chunks,
-                      size_t num_rows,
-                      size_t min_row,
-                      bool trim_pass,
-                      bool compute_string_sizes,
-                      rmm::cuda_stream_view stream);
-
 /**
- * @brief Preprocess column information for nested schemas.
+ * @brief Compute page output size information.
+ *
+ * When dealing with nested hierarchies (those that contain lists), or when doing a chunked
+ * read, we need to obtain more information up front than we have with just the row counts.
  *
- * There are several pieces of information we can't compute directly from row counts in
- * the parquet headers when dealing with nested schemas.
- * - The total sizes of all output columns at all nesting levels
- * - The starting output buffer offset for each page, for each nesting level
- * For flat schemas, these values are computed during header decoding (see gpuDecodePageHeaders)
+ * - We need to determine the sizes of each output cudf column per page
+ * - We need to determine information about where to start decoding the value stream
+ *   if we are using custom user bounds (skip_rows / num_rows)
+ * - We need to determine actual number of top level rows per page
+ * - If we are doing a chunked read, we need to determine the total string size per page
  *
- * Note : this function is where output device memory is allocated for nested columns.
  *
  * @param pages All pages to be decoded
  * @param chunks All chunks to be decoded
- * @param input_columns Input column information
- * @param output_columns Output column information
  * @param num_rows Maximum number of rows to read
  * @param min_rows crop all rows below min_row
- * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific
- * bounds
- * @param stream Cuda stream
+ * @param compute_num_rows If set to true, the num_rows field in PageInfo will be
+ * computed
+ * @param compute_string_sizes If set to true, the str_bytes field in PageInfo will
+ * be computed
+ * @param stream CUDA stream to use, default 0
  */
-/*
-void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
-                          hostdevice_vector<ColumnChunkDesc> const& chunks,
-                          std::vector<input_column_info>& input_columns,
-                          std::vector<cudf::io::detail::column_buffer>& output_columns,
-                          size_t num_rows,
-                          size_t min_row,
-                          bool uses_custom_row_bounds,
-                          rmm::cuda_stream_view stream,
-                          rmm::mr::device_memory_resource* mr);*/
+void ComputePageSizes(hostdevice_vector<PageInfo>& pages,
+                      hostdevice_vector<ColumnChunkDesc> const& chunks,
+                      size_t num_rows,
+                      size_t min_row,
+                      bool compute_num_rows,
+                      bool compute_string_sizes,
+                      rmm::cuda_stream_view stream);
 
 /**
  * @brief Launches kernel for reading the column data stored in the pages
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 2e44a0dc87c..cd719c05141 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -161,7 +161,7 @@ void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chu
 
   // for list columns, add the final offset to every offset buffer.
   // TODO : make this happen in more efficiently. Maybe use thrust::for_each
-  // on each buffer.  Or potentially do it in PreprocessColumnData
+  // on each buffer.
   // Note : the reason we are doing this here instead of in the decode kernel is
   // that it is difficult/impossible for a given page to know that it is writing the very
   // last value that should then be followed by a terminator (because rows can span
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index bd22abe393c..1b1c729f592 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -151,7 +151,7 @@ class reader::impl {
                                size_t skip_rows,
                                size_t num_rows,
                                bool uses_custom_row_bounds,
-                               size_type chunked_read_size);
+                               size_t chunked_read_size);
 
   /**
    * @brief Allocate nesting information storage for all pages and set pointers to it.
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 52c939178ad..d3b8fb5d5ec 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -774,7 +774,7 @@ namespace {
 
 struct cumulative_row_info {
   size_t row_count;   // cumulative row count
-  size_t size_bytes;  // cumulative size in bytes  
+  size_t size_bytes;  // cumulative size in bytes
   int key;            // schema index
 };
 
@@ -798,32 +798,35 @@ void print_pages(hostdevice_vector<gpu::PageInfo>& pages, rmm::cuda_stream_view
 }
 
 void print_cumulative_page_info(hostdevice_vector<gpu::PageInfo>& pages,
-                                rmm::device_uvector<int32_t> const& page_index, 
+                                rmm::device_uvector<int32_t> const& page_index,
                                 rmm::device_uvector<cumulative_row_info> const& c_info,
                                 rmm::cuda_stream_view stream)
-{  
+{
   pages.device_to_host(stream, true);
 
   printf("------------\nCumulative sizes by page\n");
 
   std::vector<int> schemas(pages.size());
   std::vector<int> h_page_index(pages.size());
-  cudaMemcpy(h_page_index.data(), page_index.data(), sizeof(int) * pages.size(), cudaMemcpyDeviceToHost);
+  cudaMemcpy(
+    h_page_index.data(), page_index.data(), sizeof(int) * pages.size(), cudaMemcpyDeviceToHost);
   std::vector<cumulative_row_info> h_cinfo(pages.size());
-  cudaMemcpy(h_cinfo.data(), c_info.data(), sizeof(cumulative_row_info) * pages.size(), cudaMemcpyDeviceToHost);
-  auto schema_iter = cudf::detail::make_counting_transform_iterator(0, [&](size_type i){
-    return pages[h_page_index[i]].src_col_schema;
-  });
+  cudaMemcpy(h_cinfo.data(),
+             c_info.data(),
+             sizeof(cumulative_row_info) * pages.size(),
+             cudaMemcpyDeviceToHost);
+  auto schema_iter = cudf::detail::make_counting_transform_iterator(
+    0, [&](size_type i) { return pages[h_page_index[i]].src_col_schema; });
   thrust::copy(thrust::seq, schema_iter, schema_iter + pages.size(), schemas.begin());
   auto last = thrust::unique(thrust::seq, schemas.begin(), schemas.end());
   schemas.resize(last - schemas.begin());
   printf("Num schemas: %lu\n", schemas.size());
-    
-  for(size_t idx=0; idx<schemas.size(); idx++){
+
+  for (size_t idx = 0; idx < schemas.size(); idx++) {
     printf("Schema %d\n", schemas[idx]);
-    for(size_t pidx=0; pidx<pages.size(); pidx++){
+    for (size_t pidx = 0; pidx < pages.size(); pidx++) {
       auto const& page = pages[h_page_index[pidx]];
-      if(page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY || page.src_col_schema != schemas[idx]){
+      if (page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY || page.src_col_schema != schemas[idx]) {
         continue;
       }
       printf("\tP: {%lu, %lu}\n", h_cinfo[pidx].row_count, h_cinfo[pidx].size_bytes);
@@ -831,31 +834,38 @@ void print_cumulative_page_info(hostdevice_vector<gpu::PageInfo>& pages,
   }
 }
 
-void print_cumulative_row_info(std::vector<cumulative_row_info> const& sizes, std::string const& label, std::optional<std::vector<gpu::chunk_read_info>> splits = std::nullopt)
+void print_cumulative_row_info(
+  std::vector<cumulative_row_info> const& sizes,
+  std::string const& label,
+  std::optional<std::vector<gpu::chunk_read_info>> splits = std::nullopt)
 {
-  if(splits.has_value()){
+  if (splits.has_value()) {
     printf("------------\nSplits\n");
-    for(size_t idx=0; idx<splits->size(); idx++){
+    for (size_t idx = 0; idx < splits->size(); idx++) {
       printf("{%lu, %lu}\n", splits.value()[idx].skip_rows, splits.value()[idx].num_rows);
     }
   }
 
   printf("------------\nCumulative sizes %s\n", label.c_str());
-  for(size_t idx=0; idx<sizes.size(); idx++){
+  for (size_t idx = 0; idx < sizes.size(); idx++) {
     printf("{%lu, %lu, %d}", sizes[idx].row_count, sizes[idx].size_bytes, sizes[idx].key);
-    if(splits.has_value()){
+    if (splits.has_value()) {
       // if we have a split at this row count and this is the last instance of this row count
-      auto start = thrust::make_transform_iterator(splits->begin(), [](gpu::chunk_read_info const& i) { return i.skip_rows; });
-      auto end = start + splits->size();
-      auto split = std::find(start, end, sizes[idx].row_count);
+      auto start = thrust::make_transform_iterator(
+        splits->begin(), [](gpu::chunk_read_info const& i) { return i.skip_rows; });
+      auto end               = start + splits->size();
+      auto split             = std::find(start, end, sizes[idx].row_count);
       auto const split_index = [&]() -> int {
-        if(split != end && ((idx == sizes.size() - 1) || (sizes[idx+1].row_count > sizes[idx].row_count))){
+        if (split != end &&
+            ((idx == sizes.size() - 1) || (sizes[idx + 1].row_count > sizes[idx].row_count))) {
           return static_cast<int>(std::distance(start, split));
-        }   
+        }
         return idx == 0 ? 0 : -1;
       }();
-      if(split_index >= 0){
-        printf(" <-- split {%lu, %lu}", splits.value()[split_index].skip_rows, splits.value()[split_index].num_rows);
+      if (split_index >= 0) {
+        printf(" <-- split {%lu, %lu}",
+               splits.value()[split_index].skip_rows,
+               splits.value()[split_index].num_rows);
       }
     }
     printf("\n");
@@ -863,6 +873,9 @@ void print_cumulative_row_info(std::vector<cumulative_row_info> const& sizes, st
 }
 #endif  // PREPROCESS_DEBUG
 
+/**
+ * @brief Functor which reduces two cumulative_row_info structs of the same key.
+ */
 struct cumulative_row_sum {
   cumulative_row_info operator()
     __device__(cumulative_row_info const& a, cumulative_row_info const& b) const
@@ -871,6 +884,12 @@ struct cumulative_row_sum {
   }
 };
 
+/**
+ * @brief Functor which computes the total data size for a given type of cudf column.
+ *
+ * In the case of strings, the return size does not include the chars themselves. That
+ * information is tracked seperately (see PageInfo::str_bytes).
+ */
 struct row_size_functor {
   __device__ size_t validity_size(size_t num_rows, bool nullable)
   {
@@ -890,7 +909,7 @@ __device__ size_t row_size_functor::operator()<list_view>(size_t num_rows, bool
 {
   auto const offset_size = sizeof(offset_type);
   // NOTE: Adding the + 1 offset here isn't strictly correct.  There will only be 1 extra offset
-  // for the entire column, whereas this is adding an extra offset per page.  So we will get a 
+  // for the entire column, whereas this is adding an extra offset per page.  So we will get a
   // small over-estimate of the real size of the order :  # of pages * 4 bytes. It seems better
   // to overestimate size somewhat than to underestimate it and potentially generate chunks
   // that are too large.
@@ -913,6 +932,12 @@ __device__ size_t row_size_functor::operator()<string_view>(size_t num_rows, boo
   return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable);
 }
 
+/**
+ * @brief Functor which computes the total output cudf data size for all of
+ * the data in this page.
+ *
+ * Sums across all nesting levels.
+ */
 struct get_cumulative_row_info {
   gpu::PageInfo const* const pages;
 
@@ -942,6 +967,21 @@ struct get_cumulative_row_info {
   }
 };
 
+/**
+ * @brief Functor which computes the effective size of all input columns by page.
+ *
+ * For a given row, we want to find the cost of all pages for all columns involved
+ * in loading up to that row.  The complication here is that not all pages are the
+ * same size between columns. Example:
+ *
+ *              page row counts
+ * Column A:    0 <----> 100 <----> 200
+ * Column B:    0 <---------------> 200 <--------> 400
+                          |
+ * if we decide to split at row 100, we don't really know the actual amount of bytes in column B
+ * at that point.  So we have to proceed as if we are taking the bytes from all 200 rows of that
+ * page. Essentially, a conservative over-estimate of the real size.
+ */
 struct row_total_size {
   cumulative_row_info const* const c_info;
   size_type const* const key_offsets;
@@ -964,14 +1004,18 @@ struct row_total_size {
   }
 };
 
+/**
+ * @brief Given a vector of cumulative {row_count, byte_size} pairs and a chunk read
+ * limit, determine the set of splits.
+ *
+ * @param sizes Vector of cumulative {row_count, byte_size} pairs
+ * @param num_rows Total number of rows to read
+ * @param chunked_read_size Limit on total number of bytes to be returned per read, for all columns
+ */
 std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> const& sizes,
-                                              size_type num_rows,
+                                              size_t num_rows,
                                               size_t chunked_read_size)
 {
-  //  for (auto x : sizes) {
-  //    printf("size: %d | %d \n", (int)x.row_count, (int)x.size_bytes);
-  //  }
-
   // now we have an array of {row_count, real output bytes}. just walk through it and generate
   // splits.
   // TODO: come up with a clever way to do this entirely in parallel. For now, as long as batch
@@ -985,15 +1029,13 @@ std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> c
       return i.size_bytes - cur_cumulative_size;
     });
     auto end   = start + sizes.size();
-    while (cur_row_count < static_cast<size_t>(num_rows)) {
-      int64_t p = thrust::lower_bound(
-                    thrust::seq, start + cur_pos, end, static_cast<size_t>(chunked_read_size)) -
-                  start;
+    while (cur_row_count < num_rows) {
+      int64_t p = thrust::lower_bound(thrust::seq, start + cur_pos, end, chunked_read_size) - start;
 
       // if we're past the end, or if the returned bucket is > than the chunked_read_size, move back
       // one.
       if (static_cast<size_t>(p) >= sizes.size() ||
-          (sizes[p].size_bytes - cur_cumulative_size > static_cast<size_t>(chunked_read_size))) {
+          (sizes[p].size_bytes - cur_cumulative_size > chunked_read_size)) {
         p--;
       }
 
@@ -1013,16 +1055,27 @@ std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> c
       cur_pos             = p;
       cur_cumulative_size = sizes[p].size_bytes;
     }
-  }  
+  }
   // print_cumulative_row_info(sizes, "adjusted", splits);
 
   return splits;
 }
 
+/**
+ * @brief Given a set of pages that have had their sizes computed by nesting level and
+ * a limit on total read size, generate a set of {skip_rows, num_rows} pairs representing
+ * a set of reads that will generate output columns of total size <= `chunked_read_size` bytes.
+ *
+ * @param pages All pages in the file
+ * @param id Additional intermediate information required to process the pages
+ * @param num_rows Total number of rows to read
+ * @param chunked_read_size Limit on total number of bytes to be returned per read, for all columns
+ * @param stream CUDA stream to use, default 0
+ */
 std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo>& pages,
                                                  gpu::chunk_intermediate_data const& id,
-                                                 size_type num_rows,
-                                                 size_type chunked_read_size,
+                                                 size_t num_rows,
+                                                 size_t chunked_read_size,
                                                  rmm::cuda_stream_view stream)
 {
   auto const& page_keys  = id.page_keys;
@@ -1039,7 +1092,7 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
                                 page_input,
                                 c_info.begin(),
                                 thrust::equal_to{},
-                                cumulative_row_sum{});  
+                                cumulative_row_sum{});
   // print_cumulative_page_info(pages, page_index, c_info, stream);
 
   // sort by row count
@@ -1052,7 +1105,10 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
                });
 
   std::vector<cumulative_row_info> h_c_info_sorted(c_info_sorted.size());
-  cudaMemcpy(h_c_info_sorted.data(), c_info_sorted.data(), sizeof(cumulative_row_info) * c_info_sorted.size(), cudaMemcpyDeviceToHost);
+  cudaMemcpy(h_c_info_sorted.data(),
+             c_info_sorted.data(),
+             sizeof(cumulative_row_info) * c_info_sorted.size(),
+             cudaMemcpyDeviceToHost);
   // print_cumulative_row_info(h_c_info_sorted, "raw");
 
   // generate key offsets (offsets to the start of each partition of keys). worst case is 1 page per
@@ -1109,6 +1165,9 @@ struct get_page_schema {
   __device__ size_type operator()(gpu::PageInfo const& page) { return page.src_col_schema; }
 };
 
+/**
+ * @brief Returns the size field of a PageInfo struct for a given depth, keyed by schema.
+ */
 struct get_page_nesting_size {
   size_type const src_col_schema;
   size_type const depth;
@@ -1124,6 +1183,9 @@ struct get_page_nesting_size {
   }
 };
 
+/**
+ * @brief Writes to the chunk_row field of the PageInfo struct
+ */
 struct chunk_row_output_iter {
   gpu::PageInfo* p;
   using value_type        = size_type;
@@ -1141,9 +1203,11 @@ struct chunk_row_output_iter {
 
   __device__ reference operator[](int i) { return p[i].chunk_row; }
   __device__ reference operator*() { return p->chunk_row; }
-  // __device__ void operator=(value_type v) { p->chunk_row = v; }
 };
 
+/**
+ * @brief Writes to the page_start_value field of the PageNestingInfo struct, keyed by schema.
+ */
 struct start_offset_output_iterator {
   gpu::PageInfo* pages;
   int const* page_indices;
@@ -1195,7 +1259,7 @@ void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDes
                                            size_t skip_rows,
                                            size_t num_rows,
                                            bool uses_custom_row_bounds,
-                                           size_type chunked_read_size)
+                                           size_t chunked_read_size)
 {
   // iterate over all input columns and determine if they contain lists so we can further
   // preprocess them.
@@ -1209,8 +1273,8 @@ void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDes
       auto& out_buf = (*cols)[input_col.nesting[l_idx]];
       cols          = &out_buf.children;
 
-      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
-      // to know how big this buffer actually is.
+      // if this has a list parent, we have to get column sizes from the
+      // data computed during gpu::ComputePageSizes
       if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
         has_lists = true;
         break;
@@ -1335,7 +1399,8 @@ void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDes
     // print_pages(pages, _stream);
   }
 
-  // compute splits if necessary.
+  // compute splits if necessary. otherwise retun a single split representing
+  // the whole file.
   _chunk_read_info =
     chunked_read_size > 0
       ? compute_splits(pages, _chunk_itm_data, num_rows, chunked_read_size, _stream)
@@ -1379,8 +1444,8 @@ void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chu
       auto& out_buf = (*cols)[input_col.nesting[l_idx]];
       cols          = &out_buf.children;
 
-      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
-      // to know how big this buffer actually is.
+      // if this has a list parent, we have to get column sizes from the
+      // data computed during gpu::ComputePageSizes
       if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
         has_lists = true;
       }
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 67f8e94f91b..e89c055b23f 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -698,7 +698,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructsOfLists)
 
   // for these tests, different columns get written to different numbers of pages so it's a
   // little tricky to describe the expected results by page counts. To get an idea of how
-  // these values are chosen, see the debug output from the call to print_cumulative_row_info() in 
+  // these values are chosen, see the debug output from the call to print_cumulative_row_info() in
   // reader_impl_preprocess.cu -> find_splits()
 
   {
@@ -836,25 +836,25 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsOfStructs)
 
   // for these tests, different columns get written to different numbers of pages so it's a
   // little tricky to describe the expected results by page counts. To get an idea of how
-  // these values are chosen, see the debug output from the call to print_cumulative_row_info() in 
-  // reader_impl_preprocess.cu -> find_splits()  
+  // these values are chosen, see the debug output from the call to print_cumulative_row_info() in
+  // reader_impl_preprocess.cu -> find_splits()
   {
     auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000);
     EXPECT_EQ(num_chunks, 7);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
-  }  
-  
+  }
+
   {
     auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000);
     EXPECT_EQ(num_chunks, 4);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
-  }  
+  }
 
   {
     auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000);
     EXPECT_EQ(num_chunks, 4);
     CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
-  }  
+  }
 
   {
     auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000);

From 5bb755ed82c9120ee756df94b232dec1cf63add7 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 31 Oct 2022 10:35:44 -0700
Subject: [PATCH 126/162] Update Javadoc

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 .../ai/rapids/cudf/ParquetChunkedReader.java  | 63 +++++++++++--------
 1 file changed, 38 insertions(+), 25 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
index 5eeb850d300..6e93aa8e4e4 100644
--- a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
+++ b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
@@ -21,37 +21,43 @@
 import java.io.File;
 
 /**
- * TODO
+ * Provide an interface for reading a Parquet file in an iterative manner.
  */
 public class ParquetChunkedReader implements AutoCloseable {
   static {
     NativeDepsLoader.loadNativeDeps();
   }
 
-
-
   /**
-   * TODO
+   * Construct the reader instance from a read limit and a file path.
+   *
+   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read.
+   * @param filePath Full path of the input Parquet file to read.
    */
-  public ParquetChunkedReader(long chunkSizeByteLimit, File path) {
-    this(chunkSizeByteLimit, ParquetOptions.DEFAULT, path);
+  public ParquetChunkedReader(long chunkSizeByteLimit, File filePath) {
+    this(chunkSizeByteLimit, ParquetOptions.DEFAULT, filePath);
   }
 
   /**
-   * TODO
+   * Construct the reader instance from a read limit, a ParquetOptions object, and a file path.
+   *
+   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read.
+   * @param opts The options for Parquet reading.
+   * @param filePath Full path of the input Parquet file to read.
    */
-  public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, File path) {
+  public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, File filePath) {
     handle = create(chunkSizeByteLimit, opts.getIncludeColumnNames(), opts.getReadBinaryAsString(),
-        path.getAbsolutePath(), 0, 0, opts.timeUnit().typeId.getNativeId());
+        filePath.getAbsolutePath(), 0, 0, opts.timeUnit().typeId.getNativeId());
   }
 
   /**
-   * TODO
-   * @param chunkSizeByteLimit Byte limit (ex: 1MB=1048576)
-   * @param opts
-   * @param buffer
-   * @param offset
-   * @param len
+   * Construct the reader instance from a read limit and a file already read in a memory buffer.
+   *
+   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read.
+   * @param opts The options for Parquet reading.
+   * @param buffer Raw Parquet file content.
+   * @param offset The starting offset into buffer.
+   * @param len The number of bytes to parse the given buffer.
    */
   public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, HostMemoryBuffer buffer,
       long offset, long len) {
@@ -60,14 +66,20 @@ public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, HostMe
   }
 
   /**
-   * TODO
+   * Check if the given file has anything left to read.
+   *
+   * @return A boolean value indicating if there is more data to read from file.
    */
   public boolean hasNext() {
     return hasNext(handle);
   }
 
   /**
-   * TODO
+   * Read a chunk of rows in the given Parquet file such that the returning data has total size
+   * does not exceed the given read limit. If the given file has no data, or all data has been read
+   * before by previous calls to this function, a null Table will be returned.
+   *
+   * @return A table of new rows reading from the given file.
    */
   public Table readChunk() {
     long[] columnPtrs = readChunk(handle);
@@ -90,14 +102,15 @@ public void close() {
   private long handle;
 
   /**
-   * TODO
-   * @param chunkSizeByteLimit     TODO
-   * @param filterColumnNames  name of the columns to read, or an empty array if we want to read
-   * @param binaryToString     whether to convert this column to String if binary
-   * @param filePath           the path of the file to read, or null if no path should be read.
-   * @param bufferAddrs        the address of the buffer to read from or 0 if we should not.
-   * @param length             the length of the buffer to read from.
-   * @param timeUnit           return type of TimeStamp in units
+   * Create a native chunked Parquet reader object on heap and return its memory address.
+   *
+   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read.
+   * @param filterColumnNames Name of the columns to read, or an empty array if we want to read all.
+   * @param binaryToString Whether to convert the corresponding column to String if it is binary.
+   * @param filePath Full path of the file to read, or given as null if reading from a buffer.
+   * @param bufferAddrs The address of a buffer to read from, or 0 if we are not using that buffer.
+   * @param length The length of the buffer to read from.
+   * @param timeUnit Return type of time unit for timestamps.
    */
   private static native long create(long chunkSizeByteLimit, String[] filterColumnNames,
       boolean[] binaryToString, String filePath, long bufferAddrs, long length, int timeUnit);

From af35c4d70a12c9ae75b38285323dc3aad7646762 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 31 Oct 2022 16:09:40 -0700
Subject: [PATCH 127/162] Update doxygen

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/detail/parquet.hpp       | 19 +++++++++--
 cpp/include/cudf/io/parquet.hpp              | 27 +++++++++-------
 cpp/src/io/parquet/reader_impl.hpp           | 34 +++++++++++++++-----
 cpp/src/io/parquet/reader_impl_preprocess.cu | 33 +++++++++----------
 4 files changed, 73 insertions(+), 40 deletions(-)

diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp
index 0697f8282f4..ced050fbcee 100644
--- a/cpp/include/cudf/io/detail/parquet.hpp
+++ b/cpp/include/cudf/io/detail/parquet.hpp
@@ -82,7 +82,7 @@ class reader {
 };
 
 /**
- * @brief The reader class that supports chunked reading of a given file.
+ * @brief The reader class that supports iterative reading of a given file.
  *
  * This class intentionally subclasses the `reader` class with private inheritance to hide the
  * `reader::read()` API. As such, only chunked reading APIs are supported.
@@ -90,9 +90,22 @@ class reader {
 class chunked_reader : reader {
  public:
   /**
-   * @brief Constructor from a read limit and an array of data sources with reader options.
+   * @brief Constructor from a read size limit and an array of data sources with reader options.
    *
-   * @param chunk_read_limit The size limit (in bytes) to read each chunk
+   * The typical usage should be similar to this:
+   * ```
+   *  do {
+   *    auto const chunk = reader.read_chunk();
+   *    // Process chunk
+   *  } while (reader.has_next());
+   *
+   * ```
+   *
+   * If `chunk_read_limit == 0` (i.e., no reading limit), a call to `read_chunk()` will read the
+   * whole file and return a table containing all rows.
+   *
+   * @param chunk_read_limit Limit on total number of bytes to be returned per read, or `0` if there
+   *        is no limit
    * @param sources Input `datasource` objects to read the dataset from
    * @param options Settings for controlling reading behavior
    * @param stream CUDA stream used for device memory operations and kernel launches.
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 426f6da6209..16a75129151 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -400,11 +400,13 @@ table_with_metadata read_parquet(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief The chunked parquet reader class to handle options and read tables in chunks.
+ * @brief The chunked parquet reader class to read Parquet file iteratively in to a series of
+ * tables, chunk by chunk.
  *
- * This class is designed to address the reading issue with parquet files that are very larger such
- * that their content columns exceed the size limit in cudf. By reading the file content by chunks
- * using this class, each chunk is guaranteed to have column sizes stay within the given limit.
+ * This class is designed to address the reading issue when reading very large Parquet files such
+ * that the sizes of their column exceed the limit that can be stored in cudf column. By reading the
+ * file content by chunks using this class, each chunk is guaranteed to have its sizes stay within
+ * the given limit.
  */
 class chunked_parquet_reader {
  public:
@@ -418,11 +420,12 @@ class chunked_parquet_reader {
   /**
    * @brief Constructor for chunked reader.
    *
-   * This constructor accepts the same `parquet_reader_option` parameter as in `read_parquet()`, but
-   * with an additional parameter to specify the size byte limit of the output table for each
-   * reading.
+   * This constructor requires the same `parquet_reader_option` parameter as in
+   * `cudf::read_parquet()`, and an additional parameter to specify the size byte limit of the
+   * output table for each reading.
    *
-   * @param chunk_read_limit The limit (in bytes) to read each time
+   * @param chunk_read_limit Limit on total number of bytes to be returned per read, or `0` if there
+   *        is no limit
    * @param options The options used to read Parquet file
    * @param mr Device memory resource to use for device memory allocation
    */
@@ -437,22 +440,22 @@ class chunked_parquet_reader {
   ~chunked_parquet_reader();
 
   /**
-   * @brief Check if there is any data of the given file has not yet read.
+   * @brief Check if there is any data in the given file has not yet read.
    *
    * @return A boolean value indicating if there is any data left to read
    */
   bool has_next();
 
   /**
-   * @brief Read a chunk of Parquet dataset into a set of columns.
+   * @brief Read a chunk of rows in the given Parquet file.
    *
    * The sequence of returned tables, if concatenated by their order, guarantees to form a complete
    * dataset as reading the entire given file at once.
    *
-   * An empty table will be returned if the file is empty, or all the data in the given file has
+   * An empty table will be returned if the given file is empty, or all the data in the file has
    * been read and returned by the previous calls.
    *
-   * @return The output `cudf::table` along with its metadata
+   * @return An output `cudf::table` along with its metadata
    */
   table_with_metadata read_chunk();
 
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 1b1c729f592..09099533420 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -55,6 +55,9 @@ class reader::impl {
   /**
    * @brief Constructor from an array of dataset sources with reader options.
    *
+   * By using this constructor, each call to `read()` or `read_chunk()` will perform reading the
+   * entire given file.
+   *
    * @param sources Dataset sources
    * @param options Settings for controlling reading behavior
    * @param stream CUDA stream used for device memory operations and kernel launches
@@ -84,9 +87,22 @@ class reader::impl {
   /**
    * @brief Constructor from a chunk read limit and an array of dataset sources with reader options.
    *
-   * By using this constructor, the reader will supports chunked reading with read size limit.
+   * By using this constructor, the reader will supports iterative (chunked) reading through
+   * `has_next() ` and `read_chunk()`. For example:
+   * ```
+   *  do {
+   *    auto const chunk = reader.read_chunk();
+   *    // Process chunk
+   *  } while (reader.has_next());
+   *
+   * ```
+   *
+   * Reading the whole given file at once through `read()` function is still supported if
+   * `chunk_read_limit == 0` (i.e., no reading limit). In such case, `read_chunk()` will also return
+   * rows of the entire file.
    *
-   * @param chunk_read_limit The size limit (in bytes) to read each chunk
+   * @param chunk_read_limit Limit on total number of bytes to be returned per read, or `0` if there
+   *        is no limit
    * @param sources Dataset sources
    * @param options Settings for controlling reading behavior
    * @param stream CUDA stream used for device memory operations and kernel launches
@@ -110,13 +126,13 @@ class reader::impl {
 
  private:
   /**
-   * @brief Perform the necessary data preprocessing for reading data later on.
+   * @brief Perform the necessary data preprocessing for reading columns later on.
    *
    * @param skip_rows Number of rows to skip from the start
-   * @param num_rows Number of rows to read
+   * @param num_rows Number of rows to read, or `-1` to read all rows
    * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
    *        bounds
-   * @param row_group_indices Lists of row groups to read, one per source
+   * @param row_group_indices Lists of row groups to read (one per source), or empty if read all
    */
   void prepare_data(size_type skip_rows,
                     size_type num_rows,
@@ -130,7 +146,7 @@ class reader::impl {
                                 size_type num_rows);
 
   /**
-   * @brief Compute the reading info (skip_rows, num_rows) for the output chunks.
+   * @brief Compute the split locations {skip_rows, num_rows} for the output chunks.
    *
    * There are several pieces of information we can't compute directly from row counts in
    * the parquet headers when dealing with nested schemas:
@@ -145,13 +161,15 @@ class reader::impl {
    * @param num_rows Maximum number of rows to read
    * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
    *        bounds
+   * @param chunk_read_limit Limit on total number of bytes to be returned per read, or `0` if there
+   *        is no limit
    */
   void compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                                hostdevice_vector<gpu::PageInfo>& pages,
                                size_t skip_rows,
                                size_t num_rows,
                                bool uses_custom_row_bounds,
-                               size_t chunked_read_size);
+                               size_t chunk_read_limit);
 
   /**
    * @brief Allocate nesting information storage for all pages and set pointers to it.
@@ -173,7 +191,7 @@ class reader::impl {
   /**
    * @brief Read a chunk of data and return an output table.
    *
-   * This function is called internally and expects all preprocessing steps have been done.
+   * This function is called internally and expects all preprocessing steps have already been done.
    *
    * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
    *        bounds
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index d3b8fb5d5ec..c5b1e9f55b0 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -1010,11 +1010,11 @@ struct row_total_size {
  *
  * @param sizes Vector of cumulative {row_count, byte_size} pairs
  * @param num_rows Total number of rows to read
- * @param chunked_read_size Limit on total number of bytes to be returned per read, for all columns
+ * @param chunk_read_limit Limit on total number of bytes to be returned per read, for all columns
  */
 std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> const& sizes,
                                               size_t num_rows,
-                                              size_t chunked_read_size)
+                                              size_t chunk_read_limit)
 {
   // now we have an array of {row_count, real output bytes}. just walk through it and generate
   // splits.
@@ -1030,12 +1030,12 @@ std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> c
     });
     auto end   = start + sizes.size();
     while (cur_row_count < num_rows) {
-      int64_t p = thrust::lower_bound(thrust::seq, start + cur_pos, end, chunked_read_size) - start;
+      int64_t p = thrust::lower_bound(thrust::seq, start + cur_pos, end, chunk_read_limit) - start;
 
-      // if we're past the end, or if the returned bucket is > than the chunked_read_size, move back
+      // if we're past the end, or if the returned bucket is > than the chunk_read_limit, move back
       // one.
       if (static_cast<size_t>(p) >= sizes.size() ||
-          (sizes[p].size_bytes - cur_cumulative_size > chunked_read_size)) {
+          (sizes[p].size_bytes - cur_cumulative_size > chunk_read_limit)) {
         p--;
       }
 
@@ -1064,18 +1064,18 @@ std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> c
 /**
  * @brief Given a set of pages that have had their sizes computed by nesting level and
  * a limit on total read size, generate a set of {skip_rows, num_rows} pairs representing
- * a set of reads that will generate output columns of total size <= `chunked_read_size` bytes.
+ * a set of reads that will generate output columns of total size <= `chunk_read_limit` bytes.
  *
  * @param pages All pages in the file
  * @param id Additional intermediate information required to process the pages
  * @param num_rows Total number of rows to read
- * @param chunked_read_size Limit on total number of bytes to be returned per read, for all columns
+ * @param chunk_read_limit Limit on total number of bytes to be returned per read, for all columns
  * @param stream CUDA stream to use, default 0
  */
 std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo>& pages,
                                                  gpu::chunk_intermediate_data const& id,
                                                  size_t num_rows,
-                                                 size_t chunked_read_size,
+                                                 size_t chunk_read_limit,
                                                  rmm::cuda_stream_view stream)
 {
   auto const& page_keys  = id.page_keys;
@@ -1150,7 +1150,7 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
                   stream);
   stream.synchronize();
 
-  return find_splits(h_adjusted, num_rows, chunked_read_size);
+  return find_splits(h_adjusted, num_rows, chunk_read_limit);
 }
 
 struct get_page_chunk_idx {
@@ -1259,7 +1259,7 @@ void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDes
                                            size_t skip_rows,
                                            size_t num_rows,
                                            bool uses_custom_row_bounds,
-                                           size_t chunked_read_size)
+                                           size_t chunk_read_limit)
 {
   // iterate over all input columns and determine if they contain lists so we can further
   // preprocess them.
@@ -1326,7 +1326,7 @@ void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDes
   }
 
   // intermediate data we will need for further chunked reads
-  if (has_lists || chunked_read_size > 0) {
+  if (has_lists || chunk_read_limit > 0) {
     // computes:
     // PageNestingInfo::num_rows for each page. the true number of rows (taking repetition into
     // account), not just the number of values. PageNestingInfo::size for each level of nesting, for
@@ -1340,8 +1340,8 @@ void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDes
                           chunks,
                           0,  // 0-max size_t. process all possible rows
                           std::numeric_limits<size_t>::max(),
-                          true,                   // compute num_rows
-                          chunked_read_size > 0,  // compute string sizes
+                          true,                  // compute num_rows
+                          chunk_read_limit > 0,  // compute string sizes
                           _stream);
 
     // computes:
@@ -1401,10 +1401,9 @@ void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDes
 
   // compute splits if necessary. otherwise retun a single split representing
   // the whole file.
-  _chunk_read_info =
-    chunked_read_size > 0
-      ? compute_splits(pages, _chunk_itm_data, num_rows, chunked_read_size, _stream)
-      : std::vector<gpu::chunk_read_info>{{skip_rows, num_rows}};
+  _chunk_read_info = chunk_read_limit > 0
+                       ? compute_splits(pages, _chunk_itm_data, num_rows, chunk_read_limit, _stream)
+                       : std::vector<gpu::chunk_read_info>{{skip_rows, num_rows}};
 }
 
 void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,

From fb1bd73381e67e7cd608fe6406034db4a8305d13 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 2 Nov 2022 11:17:52 -0700
Subject: [PATCH 128/162] Cleaning up

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>

# Conflicts:
#	cpp/src/io/parquet/reader_impl_preprocess.cu
---
 cpp/src/io/parquet/parquet_gpu.hpp           | 2 --
 cpp/src/io/parquet/reader_impl.hpp           | 6 +++---
 cpp/src/io/parquet/reader_impl_preprocess.cu | 2 +-
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index b7564684e27..8c4a738590c 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -255,7 +255,6 @@ struct ColumnChunkDesc {
   int32_t src_col_schema;  // my schema index in the file
 };
 
-// TODO: rename?
 struct file_intermediate_data {
   std::vector<std::unique_ptr<datasource::buffer>> raw_page_data;
   rmm::device_buffer decomp_page_data;
@@ -264,7 +263,6 @@ struct file_intermediate_data {
   hostdevice_vector<gpu::PageNestingInfo> page_nesting_info{};
 };
 
-// TODO: rename?
 struct chunk_intermediate_data {
   rmm::device_uvector<int32_t> page_keys{0, rmm::cuda_stream_default};
   rmm::device_uvector<int32_t> page_index{0, rmm::cuda_stream_default};
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 09099533420..32b5e23004c 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -126,7 +126,7 @@ class reader::impl {
 
  private:
   /**
-   * @brief Perform the necessary data preprocessing for reading columns later on.
+   * @brief Perform the necessary data preprocessing for parsing file later on.
    *
    * @param skip_rows Number of rows to skip from the start
    * @param num_rows Number of rows to read, or `-1` to read all rows
@@ -137,7 +137,7 @@ class reader::impl {
   void prepare_data(size_type skip_rows,
                     size_type num_rows,
                     bool uses_custom_row_bounds,
-                    const std::vector<std::vector<size_type>>& row_group_indices);
+                    std::vector<std::vector<size_type>> const& row_group_indices);
 
   /**
    * @brief Load and decompress the input file(s) into memory.
@@ -221,7 +221,7 @@ class reader::impl {
    * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
    *        bounds
    */
-  void allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+  void allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc> const& chunks,
                         hostdevice_vector<gpu::PageInfo>& pages,
                         gpu::chunk_intermediate_data const& id,
                         size_t skip_rows,
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index c5b1e9f55b0..74f6f7105cc 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -1406,7 +1406,7 @@ void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDes
                        : std::vector<gpu::chunk_read_info>{{skip_rows, num_rows}};
 }
 
-void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc> const& chunks,
                                     hostdevice_vector<gpu::PageInfo>& pages,
                                     gpu::chunk_intermediate_data const& id,
                                     size_t skip_rows,

From 842f9ea3e7be73f02d9745cbef85d54c2a422720 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 2 Nov 2022 11:24:51 -0700
Subject: [PATCH 129/162] Add doxygen

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/parquet_gpu.hpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 8c4a738590c..a8548de8f58 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -255,6 +255,9 @@ struct ColumnChunkDesc {
   int32_t src_col_schema;  // my schema index in the file
 };
 
+/**
+ * @brief Struct to store raw/intermediate file data before parsing.
+ */
 struct file_intermediate_data {
   std::vector<std::unique_ptr<datasource::buffer>> raw_page_data;
   rmm::device_buffer decomp_page_data;
@@ -263,12 +266,18 @@ struct file_intermediate_data {
   hostdevice_vector<gpu::PageNestingInfo> page_nesting_info{};
 };
 
+/**
+ * @brief Struct to store intermediate page data for parsing each chunk of rows in chunked reading.
+ */
 struct chunk_intermediate_data {
   rmm::device_uvector<int32_t> page_keys{0, rmm::cuda_stream_default};
   rmm::device_uvector<int32_t> page_index{0, rmm::cuda_stream_default};
   rmm::device_uvector<string_index_pair> str_dict_index{0, rmm::cuda_stream_default};
 };
 
+/**
+ * @brief Structs to identify the reading row range for each chunk of rows in chunked reading.
+ */
 struct chunk_read_info {
   size_t skip_rows;
   size_t num_rows;

From 34e3777376fad620fb16db679de5e961ce85c57e Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 2 Nov 2022 11:34:26 -0700
Subject: [PATCH 130/162] Clean up `reader_impl.hpp`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>

# Conflicts:
#	cpp/src/io/parquet/reader_impl.cu
#	cpp/src/io/parquet/reader_impl.hpp
#	cpp/src/io/parquet/reader_impl_preprocess.cu
---
 cpp/src/io/parquet/reader_impl.cu            | 37 +++++++-------------
 cpp/src/io/parquet/reader_impl.hpp           | 19 ++--------
 cpp/src/io/parquet/reader_impl_preprocess.cu | 10 +++---
 3 files changed, 19 insertions(+), 47 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index cd719c05141..a3488475454 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -52,12 +52,12 @@ void copy_output_buffer(column_buffer const& buff, column_buffer& new_buff)
 
 }  // namespace
 
-void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                    hostdevice_vector<gpu::PageInfo>& pages,
-                                    hostdevice_vector<gpu::PageNestingInfo>& page_nesting,
-                                    size_t skip_rows,
-                                    size_t num_rows)
+void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows)
 {
+  auto& chunks       = _file_itm_data.chunks;
+  auto& pages        = _file_itm_data.pages_info;
+  auto& page_nesting = _file_itm_data.page_nesting_info;
+
   // TODO (dm): hd_vec should have begin and end iterator members
   size_t sum_max_depths =
     std::accumulate(chunks.host_ptr(),
@@ -310,25 +310,13 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
 
   auto const& read_info = _chunk_read_info[_current_read_chunk++];
 
-  // allocate outgoing columns
-  allocate_columns(_file_itm_data.chunks,
-                   _file_itm_data.pages_info,
-                   _chunk_itm_data,
-                   read_info.skip_rows,
-                   read_info.num_rows,
-                   uses_custom_row_bounds);
-
-  //  printf("read skip_rows = %d, num_rows = %d\n", (int)read_info.skip_rows,
-  //  (int)read_info.num_rows);
-
-  // decoding column data
-  decode_page_data(_file_itm_data.chunks,
-                   _file_itm_data.pages_info,
-                   _file_itm_data.page_nesting_info,
-                   read_info.skip_rows,
-                   read_info.num_rows);
-
-  // create the final output cudf columns
+  // Allocate memory buffers for the output columns.
+  allocate_columns(read_info.skip_rows, read_info.num_rows, uses_custom_row_bounds);
+
+  // Parse data into the output buffers.
+  decode_page_data(read_info.skip_rows, read_info.num_rows);
+
+  // Create the final output cudf columns.
   for (size_t i = 0; i < _output_buffers.size(); ++i) {
     auto const metadata = _reader_column_schema.has_value()
                             ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
@@ -342,6 +330,7 @@ table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bound
     }
   }
 
+  // Add empty columns if needed.
   return finalize_output(out_metadata, out_columns);
 }
 
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 32b5e23004c..f8b8476ec17 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -213,35 +213,20 @@ class reader::impl {
   /**
    * @brief Allocate data bufers for the output columns.
    *
-   * @param chunks List of column chunk descriptors
-   * @param pages List of page information
-   * @param id The chunk intermediate data
    * @param skip_rows Crop all rows below skip_rows
    * @param num_rows Maximum number of rows to read
    * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
    *        bounds
    */
-  void allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc> const& chunks,
-                        hostdevice_vector<gpu::PageInfo>& pages,
-                        gpu::chunk_intermediate_data const& id,
-                        size_t skip_rows,
-                        size_t num_rows,
-                        bool uses_custom_row_bounds);
+  void allocate_columns(size_t skip_rows, size_t num_rows, bool uses_custom_row_bounds);
 
   /**
    * @brief Converts the page data and outputs to columns.
    *
-   * @param chunks List of column chunk descriptors
-   * @param pages List of page information
-   * @param page_nesting Page nesting array
    * @param skip_rows Minimum number of rows from start
    * @param num_rows Number of rows to output
    */
-  void decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                        hostdevice_vector<gpu::PageInfo>& pages,
-                        hostdevice_vector<gpu::PageNestingInfo>& page_nesting,
-                        size_t skip_rows,
-                        size_t num_rows);
+  void decode_page_data(size_t skip_rows, size_t num_rows);
 
  private:
   rmm::cuda_stream_view _stream;
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 74f6f7105cc..24346912e12 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -1406,13 +1406,11 @@ void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDes
                        : std::vector<gpu::chunk_read_info>{{skip_rows, num_rows}};
 }
 
-void reader::impl::allocate_columns(hostdevice_vector<gpu::ColumnChunkDesc> const& chunks,
-                                    hostdevice_vector<gpu::PageInfo>& pages,
-                                    gpu::chunk_intermediate_data const& id,
-                                    size_t skip_rows,
-                                    size_t num_rows,
-                                    bool uses_custom_row_bounds)
+void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses_custom_row_bounds)
 {
+  auto const& chunks = _file_itm_data.chunks;
+  auto& pages        = _file_itm_data.pages_info;
+
   // computes:
   // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
   // PageInfo::skipped_values, which tells us where to start decoding in the input to respect the

From 40e463ca7a47339749feaf75891c28f560fede69 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 2 Nov 2022 11:48:22 -0700
Subject: [PATCH 131/162] More cleanup

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu            |   8 +-
 cpp/src/io/parquet/reader_impl.hpp           |   6 +-
 cpp/src/io/parquet/reader_impl_helpers.cu    |   2 +-
 cpp/src/io/parquet/reader_impl_helpers.cuh   |   5 +-
 cpp/src/io/parquet/reader_impl_preprocess.cu | 105 ++++++++++---------
 5 files changed, 64 insertions(+), 62 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index a3488475454..b69331f2157 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -279,12 +279,8 @@ void reader::impl::prepare_data(size_type skip_rows,
   if (num_rows_corrected > 0 && row_groups_info.size() != 0 && _input_columns.size() != 0) {
     load_and_decompress_data(row_groups_info, num_rows_corrected);
 
-    compute_chunk_read_info(_file_itm_data.chunks,
-                            _file_itm_data.pages_info,
-                            skip_rows_corrected,
-                            num_rows_corrected,
-                            uses_custom_row_bounds,
-                            _chunk_read_limit);
+    compute_chunk_read_info(
+      skip_rows_corrected, num_rows_corrected, uses_custom_row_bounds, _chunk_read_limit);
 
     if (_chunk_read_limit == 0) {  // read the whole file at once
       CUDF_EXPECTS(_chunk_read_info.size() == 1,
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index f8b8476ec17..5579ace6543 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -155,8 +155,6 @@ class reader::impl {
    *
    * For flat schemas, these values are computed during header decoding (see gpuDecodePageHeaders).
    *
-   * @param chunks All chunks to be decoded
-   * @param pages All pages to be decoded
    * @param skip_rows Crop all rows below skip_rows
    * @param num_rows Maximum number of rows to read
    * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
@@ -164,9 +162,7 @@ class reader::impl {
    * @param chunk_read_limit Limit on total number of bytes to be returned per read, or `0` if there
    *        is no limit
    */
-  void compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                               hostdevice_vector<gpu::PageInfo>& pages,
-                               size_t skip_rows,
+  void compute_chunk_read_info(size_t skip_rows,
                                size_t num_rows,
                                bool uses_custom_row_bounds,
                                size_t chunk_read_limit);
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cu b/cpp/src/io/parquet/reader_impl_helpers.cu
index a9199df6651..f08ff455664 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cu
+++ b/cpp/src/io/parquet/reader_impl_helpers.cu
@@ -196,7 +196,7 @@ std::vector<metadata> aggregate_reader_metadata::metadatas_from_sources(
 }
 
 std::vector<std::unordered_map<std::string, std::string>>
-aggregate_reader_metadata::collect_keyval_metadata()
+aggregate_reader_metadata::collect_keyval_metadata() const
 {
   std::vector<std::unordered_map<std::string, std::string>> kv_maps;
   std::transform(per_file_metadata.cbegin(),
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cuh b/cpp/src/io/parquet/reader_impl_helpers.cuh
index 54650046779..ec27d7cf1dd 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cuh
+++ b/cpp/src/io/parquet/reader_impl_helpers.cuh
@@ -79,13 +79,14 @@ class aggregate_reader_metadata {
   /**
    * @brief Create a metadata object from each element in the source vector
    */
-  std::vector<metadata> metadatas_from_sources(
+  static std::vector<metadata> metadatas_from_sources(
     std::vector<std::unique_ptr<datasource>> const& sources);
 
   /**
    * @brief Collect the keyvalue maps from each per-file metadata object into a vector of maps.
    */
-  [[nodiscard]] std::vector<std::unordered_map<std::string, std::string>> collect_keyval_metadata();
+  [[nodiscard]] std::vector<std::unordered_map<std::string, std::string>> collect_keyval_metadata()
+    const;
 
   /**
    * @brief Sums up the number of rows of each source
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 24346912e12..a6a94dfb9cb 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -170,7 +170,7 @@ void generate_depth_remappings(std::map<int, std::pair<std::vector<int>, std::ve
 }
 
 /**
- * @brief Function that returns the required the number of bits to store a value
+ * @brief Function that returns the required the number of bits to store a value.
  */
 template <typename T = uint8_t>
 T required_bits(uint32_t max_level)
@@ -210,16 +210,18 @@ std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
 }
 
 /**
- * TODO: Rename this into something more meaningful
- *
- * @brief Reads compressed page data to device memory
+ * @brief Reads compressed page data to device memory.
  *
+ * @param sources Dataset sources
  * @param page_data Buffers to hold compressed page data for each chunk
  * @param chunks List of column chunk descriptors
  * @param begin_chunk Index of first column chunk to read
  * @param end_chunk Index after the last column chunk to read
  * @param column_chunk_offsets File offset for all chunks
+ * @param chunk_source_map Association between each column chunk and its source
+ * @param stream CUDA stream used for device memory operations and kernel launches
  *
+ * @return A future object for reading synchronization
  */
 std::future<void> read_column_chunks_async(
   std::vector<std::unique_ptr<datasource>> const& sources,
@@ -282,9 +284,10 @@ std::future<void> read_column_chunks_async(
 }
 
 /**
- * @brief Returns the number of total pages from the given column chunks
+ * @brief Return the number of total pages from the given column chunks.
  *
  * @param chunks List of column chunk descriptors
+ * @param stream CUDA stream used for device memory operations and kernel launches
  *
  * @return The total number of pages
  */
@@ -305,10 +308,11 @@ size_t count_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
 }
 
 /**
- * @brief Returns the page information from the given column chunks.
+ * @brief Decode the page information from the given column chunks.
  *
  * @param chunks List of column chunk descriptors
  * @param pages List of page information
+ * @param stream CUDA stream used for device memory operations and kernel launches
  */
 void decode_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                          hostdevice_vector<gpu::PageInfo>& pages,
@@ -332,6 +336,7 @@ void decode_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
  *
  * @param chunks List of column chunk descriptors
  * @param pages List of page information
+ * @param stream CUDA stream used for device memory operations and kernel launches
  *
  * @return Device buffer to decompressed page data
  */
@@ -639,16 +644,22 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
   // This function should never be called if `num_rows == 0`.
   CUDF_EXPECTS(num_rows > 0, "Number of reading rows must not be zero.");
 
+  auto& raw_page_data     = _file_itm_data.raw_page_data;
+  auto& decomp_page_data  = _file_itm_data.decomp_page_data;
+  auto& chunks            = _file_itm_data.chunks;
+  auto& pages_info        = _file_itm_data.pages_info;
+  auto& page_nesting_info = _file_itm_data.page_nesting_info;
+
   // Descriptors for all the chunks that make up the selected columns
   const auto num_input_columns = _input_columns.size();
   const auto num_chunks        = row_groups_info.size() * num_input_columns;
-  _file_itm_data.chunks        = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
+  chunks                       = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
 
   // Association between each column chunk and its source
   std::vector<size_type> chunk_source_map(num_chunks);
 
   // Tracker for eventually deallocating compressed and uncompressed data
-  _file_itm_data.raw_page_data = std::vector<std::unique_ptr<datasource::buffer>>(num_chunks);
+  raw_page_data = std::vector<std::unique_ptr<datasource::buffer>>(num_chunks);
 
   // Keep track of column chunk file offsets
   std::vector<size_t> column_chunk_offsets(num_chunks);
@@ -662,7 +673,7 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
     auto const row_group_start  = rg.start_row;
     auto const row_group_source = rg.source_index;
     auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
-    auto const io_chunk_idx     = _file_itm_data.chunks.size();
+    auto const io_chunk_idx     = chunks.size();
 
     // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
     for (size_t i = 0; i < num_input_columns; ++i) {
@@ -678,34 +689,33 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
                         schema.converted_type,
                         schema.type_length);
 
-      column_chunk_offsets[_file_itm_data.chunks.size()] =
+      column_chunk_offsets[chunks.size()] =
         (col_meta.dictionary_page_offset != 0)
           ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
           : col_meta.data_page_offset;
 
-      _file_itm_data.chunks.push_back(
-        gpu::ColumnChunkDesc(col_meta.total_compressed_size,
-                             nullptr,
-                             col_meta.num_values,
-                             schema.type,
-                             type_width,
-                             row_group_start,
-                             row_group_rows,
-                             schema.max_definition_level,
-                             schema.max_repetition_level,
-                             _metadata->get_output_nesting_depth(col.schema_idx),
-                             required_bits(schema.max_definition_level),
-                             required_bits(schema.max_repetition_level),
-                             col_meta.codec,
-                             converted_type,
-                             schema.logical_type,
-                             schema.decimal_scale,
-                             clock_rate,
-                             i,
-                             col.schema_idx));
+      chunks.push_back(gpu::ColumnChunkDesc(col_meta.total_compressed_size,
+                                            nullptr,
+                                            col_meta.num_values,
+                                            schema.type,
+                                            type_width,
+                                            row_group_start,
+                                            row_group_rows,
+                                            schema.max_definition_level,
+                                            schema.max_repetition_level,
+                                            _metadata->get_output_nesting_depth(col.schema_idx),
+                                            required_bits(schema.max_definition_level),
+                                            required_bits(schema.max_repetition_level),
+                                            col_meta.codec,
+                                            converted_type,
+                                            schema.logical_type,
+                                            schema.decimal_scale,
+                                            clock_rate,
+                                            i,
+                                            col.schema_idx));
 
       // Map each column chunk to its column index and its source index
-      chunk_source_map[_file_itm_data.chunks.size() - 1] = row_group_source;
+      chunk_source_map[chunks.size() - 1] = row_group_source;
 
       if (col_meta.codec != Compression::UNCOMPRESSED) {
         total_decompressed_size += col_meta.total_uncompressed_size;
@@ -713,10 +723,10 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
     }
     // Read compressed chunk data to device memory
     read_rowgroup_tasks.push_back(read_column_chunks_async(_sources,
-                                                           _file_itm_data.raw_page_data,
-                                                           _file_itm_data.chunks,
+                                                           raw_page_data,
+                                                           chunks,
                                                            io_chunk_idx,
-                                                           _file_itm_data.chunks.size(),
+                                                           chunks.size(),
                                                            column_chunk_offsets,
                                                            chunk_source_map,
                                                            _stream));
@@ -729,19 +739,18 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
   assert(remaining_rows <= 0);
 
   // Process dataset chunk pages into output columns
-  const auto total_pages    = count_page_headers(_file_itm_data.chunks, _stream);
-  _file_itm_data.pages_info = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
+  const auto total_pages = count_page_headers(chunks, _stream);
+  pages_info             = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
 
   if (total_pages > 0) {
     // decoding of column/page information
-    decode_page_headers(_file_itm_data.chunks, _file_itm_data.pages_info, _stream);
+    decode_page_headers(chunks, pages_info, _stream);
     if (total_decompressed_size > 0) {
-      _file_itm_data.decomp_page_data =
-        decompress_page_data(_file_itm_data.chunks, _file_itm_data.pages_info, _stream);
+      decomp_page_data = decompress_page_data(chunks, pages_info, _stream);
       // Free compressed data
-      for (size_t c = 0; c < _file_itm_data.chunks.size(); c++) {
-        if (_file_itm_data.chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
-          _file_itm_data.raw_page_data[c].reset();
+      for (size_t c = 0; c < chunks.size(); c++) {
+        if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
+          raw_page_data[c].reset();
           // TODO: Check if this is called
         }
       }
@@ -765,8 +774,7 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
     // nesting information (sizes, etc) stored -per page-
     // note : even for flat schemas, we allocate 1 level of "nesting" info
 
-    allocate_nesting_info(
-      _file_itm_data.chunks, _file_itm_data.pages_info, _file_itm_data.page_nesting_info);
+    allocate_nesting_info(chunks, pages_info, page_nesting_info);
   }
 }
 
@@ -1184,7 +1192,7 @@ struct get_page_nesting_size {
 };
 
 /**
- * @brief Writes to the chunk_row field of the PageInfo struct
+ * @brief Writes to the chunk_row field of the PageInfo struct.
  */
 struct chunk_row_output_iter {
   gpu::PageInfo* p;
@@ -1254,13 +1262,14 @@ struct start_offset_output_iterator {
 
 }  // anonymous namespace
 
-void reader::impl::compute_chunk_read_info(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                           hostdevice_vector<gpu::PageInfo>& pages,
-                                           size_t skip_rows,
+void reader::impl::compute_chunk_read_info(size_t skip_rows,
                                            size_t num_rows,
                                            bool uses_custom_row_bounds,
                                            size_t chunk_read_limit)
 {
+  auto& chunks = _file_itm_data.chunks;
+  auto& pages  = _file_itm_data.pages_info;
+
   // iterate over all input columns and determine if they contain lists so we can further
   // preprocess them.
   bool has_lists = false;

From 7252b0d72b7a414984978115fb0d329ebbf66377 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 2 Nov 2022 12:01:18 -0700
Subject: [PATCH 132/162] Cleanup `allocate_nesting_info`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.hpp           |  8 +-------
 cpp/src/io/parquet/reader_impl_preprocess.cu | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 5579ace6543..0c24cb58ff2 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -175,14 +175,8 @@ class reader::impl {
    *
    * Note that this gets called even in the flat schema case so that we have a
    * consistent place to store common information such as value counts, etc.
-   *
-   * @param chunks List of column chunk descriptors
-   * @param pages List of page information
-   * @param page_nesting_info The allocated nesting info structs.
    */
-  void allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc> const& chunks,
-                             hostdevice_vector<gpu::PageInfo>& pages,
-                             hostdevice_vector<gpu::PageNestingInfo>& page_nesting_info);
+  void allocate_nesting_info();
 
   /**
    * @brief Read a chunk of data and return an output table.
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index a6a94dfb9cb..94afe19ca69 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -520,10 +520,12 @@ rmm::device_buffer decompress_page_data(hostdevice_vector<gpu::ColumnChunkDesc>&
 
 }  // namespace
 
-void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc> const& chunks,
-                                         hostdevice_vector<gpu::PageInfo>& pages,
-                                         hostdevice_vector<gpu::PageNestingInfo>& page_nesting_info)
+void reader::impl::allocate_nesting_info()
 {
+  auto const& chunks      = _file_itm_data.chunks;
+  auto& pages             = _file_itm_data.pages_info;
+  auto& page_nesting_info = _file_itm_data.page_nesting_info;
+
   // compute total # of page_nesting infos needed and allocate space. doing this in one
   // buffer to keep it to a single gpu allocation
   size_t const total_page_nesting_infos = std::accumulate(
@@ -644,11 +646,10 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
   // This function should never be called if `num_rows == 0`.
   CUDF_EXPECTS(num_rows > 0, "Number of reading rows must not be zero.");
 
-  auto& raw_page_data     = _file_itm_data.raw_page_data;
-  auto& decomp_page_data  = _file_itm_data.decomp_page_data;
-  auto& chunks            = _file_itm_data.chunks;
-  auto& pages_info        = _file_itm_data.pages_info;
-  auto& page_nesting_info = _file_itm_data.page_nesting_info;
+  auto& raw_page_data    = _file_itm_data.raw_page_data;
+  auto& decomp_page_data = _file_itm_data.decomp_page_data;
+  auto& chunks           = _file_itm_data.chunks;
+  auto& pages_info       = _file_itm_data.pages_info;
 
   // Descriptors for all the chunks that make up the selected columns
   const auto num_input_columns = _input_columns.size();
@@ -773,8 +774,7 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
 
     // nesting information (sizes, etc) stored -per page-
     // note : even for flat schemas, we allocate 1 level of "nesting" info
-
-    allocate_nesting_info(chunks, pages_info, page_nesting_info);
+    allocate_nesting_info();
   }
 }
 

From 696182cf2d60f59e8865f98d6b20a586104db1dc Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 2 Nov 2022 13:37:15 -0700
Subject: [PATCH 133/162] Reformat

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/detail/parquet.hpp |  4 ++--
 cpp/include/cudf/io/parquet.hpp        |  4 ++--
 cpp/src/io/parquet/reader_impl.hpp     | 12 ++++++------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp
index ced050fbcee..31b7c71b769 100644
--- a/cpp/include/cudf/io/detail/parquet.hpp
+++ b/cpp/include/cudf/io/detail/parquet.hpp
@@ -104,8 +104,8 @@ class chunked_reader : reader {
    * If `chunk_read_limit == 0` (i.e., no reading limit), a call to `read_chunk()` will read the
    * whole file and return a table containing all rows.
    *
-   * @param chunk_read_limit Limit on total number of bytes to be returned per read, or `0` if there
-   *        is no limit
+   * @param chunk_read_limit Limit on total number of bytes to be returned per read,
+   *        or `0` if there is no limit
    * @param sources Input `datasource` objects to read the dataset from
    * @param options Settings for controlling reading behavior
    * @param stream CUDA stream used for device memory operations and kernel launches.
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 16a75129151..b866fa00a55 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -424,8 +424,8 @@ class chunked_parquet_reader {
    * `cudf::read_parquet()`, and an additional parameter to specify the size byte limit of the
    * output table for each reading.
    *
-   * @param chunk_read_limit Limit on total number of bytes to be returned per read, or `0` if there
-   *        is no limit
+   * @param chunk_read_limit Limit on total number of bytes to be returned per read,
+   *        or `0` if there is no limit
    * @param options The options used to read Parquet file
    * @param mr Device memory resource to use for device memory allocation
    */
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 0c24cb58ff2..4667e6051a5 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -98,11 +98,11 @@ class reader::impl {
    * ```
    *
    * Reading the whole given file at once through `read()` function is still supported if
-   * `chunk_read_limit == 0` (i.e., no reading limit). In such case, `read_chunk()` will also return
-   * rows of the entire file.
+   * `chunk_read_limit == 0` (i.e., no reading limit).
+   * In such case, `read_chunk()` will also return rows of the entire file.
    *
-   * @param chunk_read_limit Limit on total number of bytes to be returned per read, or `0` if there
-   *        is no limit
+   * @param chunk_read_limit Limit on total number of bytes to be returned per read,
+   *        or `0` if there is no limit
    * @param sources Dataset sources
    * @param options Settings for controlling reading behavior
    * @param stream CUDA stream used for device memory operations and kernel launches
@@ -159,8 +159,8 @@ class reader::impl {
    * @param num_rows Maximum number of rows to read
    * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
    *        bounds
-   * @param chunk_read_limit Limit on total number of bytes to be returned per read, or `0` if there
-   *        is no limit
+   * @param chunk_read_limit Limit on total number of bytes to be returned per read,
+   *        or `0` if there is no limit
    */
   void compute_chunk_read_info(size_t skip_rows,
                                size_t num_rows,

From 4c353fd421181ced08820f85786dc3383f31935e Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 2 Nov 2022 13:50:52 -0700
Subject: [PATCH 134/162] Further cleanup

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader.cu                 |  7 ++-----
 cpp/src/io/parquet/reader_impl.cu            | 10 ----------
 cpp/src/io/parquet/reader_impl.hpp           |  6 ++----
 cpp/src/io/parquet/reader_impl_helpers.cu    |  2 --
 cpp/src/io/parquet/reader_impl_helpers.cuh   |  5 +----
 cpp/src/io/parquet/reader_impl_preprocess.cu |  3 +--
 6 files changed, 6 insertions(+), 27 deletions(-)

diff --git a/cpp/src/io/parquet/reader.cu b/cpp/src/io/parquet/reader.cu
index 145a941ae47..de79d93cfb3 100644
--- a/cpp/src/io/parquet/reader.cu
+++ b/cpp/src/io/parquet/reader.cu
@@ -15,15 +15,12 @@
  */
 
 #include "reader_impl.hpp"
-#include "reader_impl_helpers.cuh"
-
-#include <cudf/io/detail/parquet.hpp>
 
 namespace cudf::io::detail::parquet {
 
 reader::reader() = default;
 
-reader::reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
+reader::reader(std::vector<std::unique_ptr<datasource>>&& sources,
                parquet_reader_options const& options,
                rmm::cuda_stream_view stream,
                rmm::mr::device_memory_resource* mr)
@@ -44,7 +41,7 @@ table_with_metadata reader::read(parquet_reader_options const& options)
 }
 
 chunked_reader::chunked_reader(std::size_t chunk_read_limit,
-                               std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
+                               std::vector<std::unique_ptr<datasource>>&& sources,
                                parquet_reader_options const& options,
                                rmm::cuda_stream_view stream,
                                rmm::mr::device_memory_resource* mr)
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index b69331f2157..127594c7403 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -18,16 +18,6 @@
 
 #include <cudf/detail/utilities/vector_factories.hpp>
 
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_buffer.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/iterator/iterator_categories.h>
-#include <thrust/iterator/transform_iterator.h>
-#include <thrust/reduce.h>
-#include <thrust/scan.h>
-
 namespace cudf::io::detail::parquet {
 
 namespace {
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 4667e6051a5..79877ad3944 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -21,22 +21,20 @@
 
 #pragma once
 
-#include "parquet.hpp"
 #include "parquet_gpu.hpp"
 #include "reader_impl_helpers.cuh"
 
 #include <io/utilities/column_buffer.hpp>
-#include <io/utilities/hostdevice_vector.hpp>
 
 #include <cudf/io/datasource.hpp>
 #include <cudf/io/detail/parquet.hpp>
 #include <cudf/io/parquet.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
 
 #include <memory>
-#include <string>
-#include <utility>
+#include <optional>
 #include <vector>
 
 namespace cudf::io::detail::parquet {
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cu b/cpp/src/io/parquet/reader_impl_helpers.cu
index f08ff455664..2a364e92bbf 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cu
+++ b/cpp/src/io/parquet/reader_impl_helpers.cu
@@ -16,8 +16,6 @@
 
 #include "reader_impl_helpers.cuh"
 
-#include <cudf/io/datasource.hpp>
-
 #include <regex>
 
 namespace cudf::io::detail::parquet {
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cuh b/cpp/src/io/parquet/reader_impl_helpers.cuh
index ec27d7cf1dd..4e8bd267bae 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cuh
+++ b/cpp/src/io/parquet/reader_impl_helpers.cuh
@@ -19,12 +19,9 @@
 #include "compact_protocol_reader.hpp"
 #include "parquet_gpu.hpp"
 
-#include <io/comp/gpuinflate.hpp>
-
 #include <cudf/fixed_point/fixed_point.hpp>
+#include <cudf/io/datasource.hpp>
 #include <cudf/types.hpp>
-#include <cudf/utilities/error.hpp>
-#include <cudf/utilities/span.hpp>
 
 #include <tuple>
 #include <vector>
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 94afe19ca69..5346b8bf185 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -15,7 +15,6 @@
  */
 
 #include "reader_impl.hpp"
-#include "reader_impl_helpers.cuh"
 
 #include <io/comp/nvcomp_adapter.hpp>
 #include <io/utilities/config_utils.hpp>
@@ -25,7 +24,6 @@
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 
-#include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/binary_search.h>
@@ -38,6 +36,7 @@
 #include <thrust/logical.h>
 #include <thrust/reduce.h>
 #include <thrust/scan.h>
+#include <thrust/sequence.h>
 #include <thrust/sort.h>
 #include <thrust/transform.h>
 #include <thrust/unique.h>

From 31590cb589fda56d302273e31e743e7f4df29d4e Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 3 Nov 2022 11:47:09 -0700
Subject: [PATCH 135/162] Rename `compute_chunk_read_info` into
 `preprocess_pages`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cu            |  3 +--
 cpp/src/io/parquet/reader_impl.hpp           | 11 ++++++-----
 cpp/src/io/parquet/reader_impl_preprocess.cu |  8 ++++----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 127594c7403..2ca87736f38 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -268,8 +268,7 @@ void reader::impl::prepare_data(size_type skip_rows,
 
   if (num_rows_corrected > 0 && row_groups_info.size() != 0 && _input_columns.size() != 0) {
     load_and_decompress_data(row_groups_info, num_rows_corrected);
-
-    compute_chunk_read_info(
+    preprocess_pages(
       skip_rows_corrected, num_rows_corrected, uses_custom_row_bounds, _chunk_read_limit);
 
     if (_chunk_read_limit == 0) {  // read the whole file at once
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 79877ad3944..7609e804871 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -144,7 +144,8 @@ class reader::impl {
                                 size_type num_rows);
 
   /**
-   * @brief Compute the split locations {skip_rows, num_rows} for the output chunks.
+   * @brief Perform some preprocessing for page data and also compute the split locations
+   * {skip_rows, num_rows} for chunked reading.
    *
    * There are several pieces of information we can't compute directly from row counts in
    * the parquet headers when dealing with nested schemas:
@@ -160,10 +161,10 @@ class reader::impl {
    * @param chunk_read_limit Limit on total number of bytes to be returned per read,
    *        or `0` if there is no limit
    */
-  void compute_chunk_read_info(size_t skip_rows,
-                               size_t num_rows,
-                               bool uses_custom_row_bounds,
-                               size_t chunk_read_limit);
+  void preprocess_pages(size_t skip_rows,
+                        size_t num_rows,
+                        bool uses_custom_row_bounds,
+                        size_t chunk_read_limit);
 
   /**
    * @brief Allocate nesting information storage for all pages and set pointers to it.
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 5346b8bf185..03021e0ad28 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -1261,10 +1261,10 @@ struct start_offset_output_iterator {
 
 }  // anonymous namespace
 
-void reader::impl::compute_chunk_read_info(size_t skip_rows,
-                                           size_t num_rows,
-                                           bool uses_custom_row_bounds,
-                                           size_t chunk_read_limit)
+void reader::impl::preprocess_pages(size_t skip_rows,
+                                    size_t num_rows,
+                                    bool uses_custom_row_bounds,
+                                    size_t chunk_read_limit)
 {
   auto& chunks = _file_itm_data.chunks;
   auto& pages  = _file_itm_data.pages_info;

From 8671ed63027e7ba20f33c3b6d492a19df4338f8f Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 4 Nov 2022 08:00:55 -0700
Subject: [PATCH 136/162] Make `hasNext` return `true` at least once

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 .../ai/rapids/cudf/ParquetChunkedReader.java  | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
index 6e93aa8e4e4..280f7b8529e 100644
--- a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
+++ b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
@@ -31,7 +31,8 @@ public class ParquetChunkedReader implements AutoCloseable {
   /**
    * Construct the reader instance from a read limit and a file path.
    *
-   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read.
+   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read,
+   *                           or 0 if there is no limit.
    * @param filePath Full path of the input Parquet file to read.
    */
   public ParquetChunkedReader(long chunkSizeByteLimit, File filePath) {
@@ -41,7 +42,8 @@ public ParquetChunkedReader(long chunkSizeByteLimit, File filePath) {
   /**
    * Construct the reader instance from a read limit, a ParquetOptions object, and a file path.
    *
-   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read.
+   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read,
+   *                           or 0 if there is no limit.
    * @param opts The options for Parquet reading.
    * @param filePath Full path of the input Parquet file to read.
    */
@@ -53,7 +55,8 @@ public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, File f
   /**
    * Construct the reader instance from a read limit and a file already read in a memory buffer.
    *
-   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read.
+   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read,
+   *                           or 0 if there is no limit.
    * @param opts The options for Parquet reading.
    * @param buffer Raw Parquet file content.
    * @param offset The starting offset into buffer.
@@ -71,6 +74,13 @@ public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, HostMe
    * @return A boolean value indicating if there is more data to read from file.
    */
   public boolean hasNext() {
+    if (firstCall) {
+      // This function needs to return true at least once, so an empty table
+      // (but having empty columns instead of no column) can be returned by readChunk()
+      // if the input file has no row.
+      firstCall = false;
+      return true;
+    }
     return hasNext(handle);
   }
 
@@ -101,10 +111,16 @@ public void close() {
 
   private long handle;
 
+  /**
+   * Auxiliary variable to help {@link #hasNext()} returning true at least once.
+   */
+  private boolean firstCall = true;
+
   /**
    * Create a native chunked Parquet reader object on heap and return its memory address.
    *
-   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read.
+   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read,
+   *                           or 0 if there is no limit.
    * @param filterColumnNames Name of the columns to read, or an empty array if we want to read all.
    * @param binaryToString Whether to convert the corresponding column to String if it is binary.
    * @param filePath Full path of the file to read, or given as null if reading from a buffer.

From 8ba10b878c6e1743219dd79a4f44cdf025b4ed45 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 4 Nov 2022 08:03:32 -0700
Subject: [PATCH 137/162] Add doxygen for `handle` variable

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 .../main/java/ai/rapids/cudf/ParquetChunkedReader.java    | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
index 280f7b8529e..360532342b7 100644
--- a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
+++ b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
@@ -109,13 +109,17 @@ public void close() {
   }
 
 
-  private long handle;
-
   /**
    * Auxiliary variable to help {@link #hasNext()} returning true at least once.
    */
   private boolean firstCall = true;
 
+  /**
+   * Handle for memory address of the native Parquet chunked reader class.
+   */
+  private long handle;
+
+
   /**
    * Create a native chunked Parquet reader object on heap and return its memory address.
    *

From d91c690060784b08317a0d08cd9cab1ea23f84f3 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Thu, 10 Nov 2022 15:48:33 -0600
Subject: [PATCH 138/162] Re-adding an optimization that somehow got nuked
 during a merge.

---
 cpp/src/io/parquet/page_data.cu              | 113 ++++++++++++-------
 cpp/src/io/parquet/reader_impl_preprocess.cu |  15 ++-
 2 files changed, 80 insertions(+), 48 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 56351d7d45b..38627761a01 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -1464,9 +1464,6 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   // how many rows we've processed in the page so far
   int input_row_count = s->input_row_count;
 
-  // how many valid leaves we've processed
-  // int input_leaf_valid_count = s->input_leaf_valid_count;
-
   while (input_value_count < target_input_value_count) {
     int start_depth, end_depth, d;
     get_nesting_bounds(
@@ -1512,15 +1509,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
       int const in_nesting_bounds =
         (s_idx >= start_depth && s_idx <= end_depth && in_row_bounds) ? 1 : 0;
       uint32_t const count_mask = ballot(in_nesting_bounds);
-      if (!t) { pni->size += __popc(count_mask); }
-
-      /*
-      if (s_idx == max_depth - 1) {
-        bool const is_valid = is_new_leaf && in_nesting_bounds;
-        uint32_t const warp_leaf_valid_mask = ballot(is_valid);
-        input_leaf_valid_count += __popc(warp_leaf_valid_mask);
-      }
-      */
+      if (!t) { pni->batch_size += __popc(count_mask); }
     }
 
     input_value_count += min(32, (target_input_value_count - input_value_count));
@@ -1570,7 +1559,7 @@ __global__ void __launch_bounds__(block_size)
                       device_span<ColumnChunkDesc const> chunks,
                       size_t min_row,
                       size_t num_rows,
-                      bool compute_num_rows_pass,
+                      bool base_pass,
                       bool compute_string_sizes)
 {
   __shared__ __align__(16) page_state_s state_g;
@@ -1582,51 +1571,78 @@ __global__ void __launch_bounds__(block_size)
 
   if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, false)) { return; }
 
+  if (!t) {
+    s->page.skipped_values      = -1;
+    s->page.skipped_leaf_values = 0;
+    s->page.str_bytes           = 0;
+    s->input_row_count          = 0;
+    s->input_value_count        = 0;
+
+    // in the base pass, we're computing the number of rows, make sure we visit absolutely
+    // everything
+    if (base_pass) {
+      s->first_row             = 0;
+      s->num_rows              = INT_MAX;
+      s->row_index_lower_bound = -1;
+    }
+  }
+
   // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
   // containing lists anywhere within).
   bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
   compute_string_sizes =
     compute_string_sizes && ((s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4);
 
-  // reasons we might want to early out:
+  // various early out optimizations:
+
   // - if this is a flat hierarchy (no lists) and is not a string column. in this case we don't need
   // to do
   //   the expensive work of traversing the level data to determine sizes.  we can just compute it
   //   directly.
-  // - if this is the trim pass and we have no rows to output for this page.
   if (!has_repetition && !compute_string_sizes) {
-    if (!t) {
-      // note: doing this for all nesting levels because we can still have structs even if we don't
-      // have lists.
-      for (size_type idx = 0; idx < pp->num_nesting_levels; idx++) {
-        pp->nesting[idx].size = pp->num_input_values;
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) {
+        if (base_pass) { pp->nesting[i].size = pp->num_input_values; }
+        pp->nesting[i].batch_size = pp->num_input_values;
       }
+      d += blockDim.x;
+    }
+    return;
+  }
+
+  // - if this page is not at the beginning or end of the trim bounds, the batch size is
+  //   the full page size
+  if (!base_pass && s->num_rows == s->page.num_rows) {
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) { pp->nesting[i].batch_size = pp->nesting[i].size; }
+      d += blockDim.x;
     }
     return;
   }
 
+  // - if this page is completely trimmed, zero out sizes.
+  if (!base_pass && s->num_rows == 0) {
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) { pp->nesting[i].batch_size = 0; }
+      d += blockDim.x;
+    }
+    return;
+  }
+
+  // at this point we are going to be fully recomputing batch information
+
   // zero sizes
   int d = 0;
   while (d < s->page.num_nesting_levels) {
-    if (d + t < s->page.num_nesting_levels) { s->page.nesting[d + t].size = 0; }
+    if (d + t < s->page.num_nesting_levels) { s->page.nesting[d + t].batch_size = 0; }
     d += blockDim.x;
   }
-  if (!t) {
-    s->page.skipped_values      = -1;
-    s->page.skipped_leaf_values = 0;
-    s->page.str_bytes           = 0;
-    s->input_row_count          = 0;
-    s->input_value_count        = 0;
-
-    // if we're computing the number of rows, make sure we visit absolutely everything
-    if (compute_num_rows_pass) {
-      s->first_row             = 0;
-      s->num_rows              = INT_MAX;
-      s->row_index_lower_bound = -1;
-    }
-  }
-  // if we have no work to do for this page.
-  if (!compute_num_rows_pass && s->num_rows == 0) { return; }
   __syncthreads();
 
   // optimization : it might be useful to have a version of gpuDecodeStream that could go wider than
@@ -1651,7 +1667,7 @@ __global__ void __launch_bounds__(block_size)
                                               : s->lvl_count[level_type::DEFINITION];
 
       // process what we got back
-      gpuUpdatePageSizes(s, actual_input_count, t, !compute_num_rows_pass);
+      gpuUpdatePageSizes(s, actual_input_count, t, !base_pass);
       if (compute_string_sizes) {
         auto const str_len = gpuGetStringSizes(s, s->input_leaf_count, t);
         if (!t) { s->page.str_bytes += str_len; }
@@ -1661,9 +1677,26 @@ __global__ void __launch_bounds__(block_size)
       __syncwarp();
     }
   }
-  // update # rows in the actual page
+
+  // update output results:
+  // - real number of rows for the whole page
+  // - nesting sizes for the whole page
+  // - skipped value information for trimmed pages
+  // - string bytes
+  if (base_pass) {
+    // nesting level 0 is the root column, so the size is also the # of rows
+    if (!t) { pp->num_rows = s->page.nesting[0].batch_size; }
+
+    // store off this batch size as the "full" size
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) { pp->nesting[i].size = pp->nesting[i].batch_size; }
+      d += blockDim.x;
+    }
+  }
+
   if (!t) {
-    if (compute_num_rows_pass) { pp->num_rows = s->page.nesting[0].size; }
     pp->skipped_values      = s->page.skipped_values;
     pp->skipped_leaf_values = s->page.skipped_leaf_values;
     pp->str_bytes           = s->page.str_bytes;
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 5346b8bf185..f7174225699 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -895,7 +895,7 @@ struct cumulative_row_sum {
  * @brief Functor which computes the total data size for a given type of cudf column.
  *
  * In the case of strings, the return size does not include the chars themselves. That
- * information is tracked seperately (see PageInfo::str_bytes).
+ * information is tracked separately (see PageInfo::str_bytes).
  */
 struct row_size_functor {
   __device__ size_t validity_size(size_t num_rows, bool nullable)
@@ -933,7 +933,7 @@ template <>
 __device__ size_t row_size_functor::operator()<string_view>(size_t num_rows, bool nullable)
 {
   // only returns the size of offsets and validity. the size of the actual string chars
-  // is tracked seperately.
+  // is tracked separately.
   auto const offset_size = sizeof(offset_type);
   // see note about offsets in the list_view template.
   return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable);
@@ -1186,7 +1186,7 @@ struct get_page_nesting_size {
     if (page.src_col_schema != src_col_schema || page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
       return 0;
     }
-    return page.nesting[depth].size;
+    return page.nesting[depth].batch_size;
   }
 };
 
@@ -1420,11 +1420,10 @@ void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses
   auto& pages        = _file_itm_data.pages_info;
 
   // computes:
-  // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
-  // PageInfo::skipped_values, which tells us where to start decoding in the input to respect the
-  // user bounds.
-  // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
-  // specified artifical bounds).
+  // PageNestingInfo::batch_size for each level of nesting, for each page, taking row bounds into
+  // account. PageInfo::skipped_values, which tells us where to start decoding in the input to
+  // respect the user bounds. It is only necessary to do this second pass if uses_custom_row_bounds
+  // is set (if the user has specified artifical bounds).
   if (uses_custom_row_bounds) {
     gpu::ComputePageSizes(pages,
                           chunks,

From c5e73ce5dca84881f9fff6a7102a44d5c91dd6a0 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 10 Nov 2022 14:01:27 -0800
Subject: [PATCH 139/162] Optimization:  store off global nesting sizes per
 page so that during trim passes we only have to fully process pages that are
 on the edges of the skip_rows/num_rows boundary.

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>

# Conflicts:
#	cpp/src/io/parquet/page_data.cu
#	cpp/tests/io/parquet_chunked_reader_test.cpp
---
 cpp/src/io/parquet/page_data.cu              | 105 +++++++++++++------
 cpp/src/io/parquet/reader_impl_preprocess.cu |  13 ++-
 2 files changed, 78 insertions(+), 40 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 56351d7d45b..b67344f22f3 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -1464,9 +1464,6 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   // how many rows we've processed in the page so far
   int input_row_count = s->input_row_count;
 
-  // how many valid leaves we've processed
-  // int input_leaf_valid_count = s->input_leaf_valid_count;
-
   while (input_value_count < target_input_value_count) {
     int start_depth, end_depth, d;
     get_nesting_bounds(
@@ -1512,7 +1509,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
       int const in_nesting_bounds =
         (s_idx >= start_depth && s_idx <= end_depth && in_row_bounds) ? 1 : 0;
       uint32_t const count_mask = ballot(in_nesting_bounds);
-      if (!t) { pni->size += __popc(count_mask); }
+      if (!t) { pni->batch_size += __popc(count_mask); }
 
       /*
       if (s_idx == max_depth - 1) {
@@ -1532,8 +1529,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   if (!t) {
     s->input_value_count = target_input_value_count;
     s->input_leaf_count  = input_leaf_count;
-    // s->input_leaf_valid_count = input_leaf_valid_count;
-    s->input_row_count = input_row_count;
+    s->input_row_count   = input_row_count;
   }
 }
 
@@ -1570,7 +1566,7 @@ __global__ void __launch_bounds__(block_size)
                       device_span<ColumnChunkDesc const> chunks,
                       size_t min_row,
                       size_t num_rows,
-                      bool compute_num_rows_pass,
+                      bool base_pass,
                       bool compute_string_sizes)
 {
   __shared__ __align__(16) page_state_s state_g;
@@ -1581,6 +1577,21 @@ __global__ void __launch_bounds__(block_size)
   PageInfo* pp          = &pages[page_idx];
 
   if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, false)) { return; }
+  if (!t) {
+    s->page.skipped_values      = -1;
+    s->page.skipped_leaf_values = 0;
+    s->page.str_bytes           = 0;
+    s->input_row_count          = 0;
+    s->input_value_count        = 0;
+
+    // in the base pass, we're computing the number of rows, make sure we visit absolutely
+    // everything
+    if (base_pass) {
+      s->first_row             = 0;
+      s->num_rows              = INT_MAX;
+      s->row_index_lower_bound = -1;
+    }
+  }
 
   // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
   // containing lists anywhere within).
@@ -1588,45 +1599,57 @@ __global__ void __launch_bounds__(block_size)
   compute_string_sizes =
     compute_string_sizes && ((s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4);
 
-  // reasons we might want to early out:
+  // various early out optimizations:
+
   // - if this is a flat hierarchy (no lists) and is not a string column. in this case we don't need
   // to do
   //   the expensive work of traversing the level data to determine sizes.  we can just compute it
   //   directly.
-  // - if this is the trim pass and we have no rows to output for this page.
   if (!has_repetition && !compute_string_sizes) {
-    if (!t) {
-      // note: doing this for all nesting levels because we can still have structs even if we don't
-      // have lists.
-      for (size_type idx = 0; idx < pp->num_nesting_levels; idx++) {
-        pp->nesting[idx].size = pp->num_input_values;
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) {
+        if (base_pass) { pp->nesting[i].size = pp->num_input_values; }
+        pp->nesting[i].batch_size = pp->num_input_values;
       }
+      d += blockDim.x;
+    }
+    return;
+  }
+
+  // - if this page is not at the beginning or end of the trim bounds, the batch size is
+  //   the full page size
+  if (!base_pass && s->num_rows == s->page.num_rows) {
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) { pp->nesting[i].batch_size = pp->nesting[i].size; }
+      d += blockDim.x;
     }
     return;
   }
 
+  // - if this page is completely trimmed, zero out sizes.
+  if (!base_pass && s->num_rows == 0) {
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) { pp->nesting[i].batch_size = 0; }
+      d += blockDim.x;
+    }
+    return;
+  }
+
+  // at this point we are going to be fully recomputing batch information
+
   // zero sizes
   int d = 0;
   while (d < s->page.num_nesting_levels) {
-    if (d + t < s->page.num_nesting_levels) { s->page.nesting[d + t].size = 0; }
+    if (d + t < s->page.num_nesting_levels) { s->page.nesting[d + t].batch_size = 0; }
     d += blockDim.x;
   }
-  if (!t) {
-    s->page.skipped_values      = -1;
-    s->page.skipped_leaf_values = 0;
-    s->page.str_bytes           = 0;
-    s->input_row_count          = 0;
-    s->input_value_count        = 0;
 
-    // if we're computing the number of rows, make sure we visit absolutely everything
-    if (compute_num_rows_pass) {
-      s->first_row             = 0;
-      s->num_rows              = INT_MAX;
-      s->row_index_lower_bound = -1;
-    }
-  }
-  // if we have no work to do for this page.
-  if (!compute_num_rows_pass && s->num_rows == 0) { return; }
   __syncthreads();
 
   // optimization : it might be useful to have a version of gpuDecodeStream that could go wider than
@@ -1651,7 +1674,7 @@ __global__ void __launch_bounds__(block_size)
                                               : s->lvl_count[level_type::DEFINITION];
 
       // process what we got back
-      gpuUpdatePageSizes(s, actual_input_count, t, !compute_num_rows_pass);
+      gpuUpdatePageSizes(s, actual_input_count, t, !base_pass);
       if (compute_string_sizes) {
         auto const str_len = gpuGetStringSizes(s, s->input_leaf_count, t);
         if (!t) { s->page.str_bytes += str_len; }
@@ -1661,9 +1684,25 @@ __global__ void __launch_bounds__(block_size)
       __syncwarp();
     }
   }
-  // update # rows in the actual page
+
+  // update output results:
+  // - real number of rows for the whole page
+  // - nesting sizes for the whole page
+  // - skipped value information for trimmed pages
+  // - string bytes
+  if (base_pass) {
+    // nesting level 0 is the root column, so the size is also the # of rows
+    if (!t) { pp->num_rows = s->page.nesting[0].batch_size; }
+
+    // store off this batch size as the "full" size
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) { pp->nesting[i].size = pp->nesting[i].batch_size; }
+      d += blockDim.x;
+    }
+  }
   if (!t) {
-    if (compute_num_rows_pass) { pp->num_rows = s->page.nesting[0].size; }
     pp->skipped_values      = s->page.skipped_values;
     pp->skipped_leaf_values = s->page.skipped_leaf_values;
     pp->str_bytes           = s->page.str_bytes;
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 03021e0ad28..eca08938bd2 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -933,7 +933,7 @@ template <>
 __device__ size_t row_size_functor::operator()<string_view>(size_t num_rows, bool nullable)
 {
   // only returns the size of offsets and validity. the size of the actual string chars
-  // is tracked seperately.
+  // is tracked separately.
   auto const offset_size = sizeof(offset_type);
   // see note about offsets in the list_view template.
   return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable);
@@ -1186,7 +1186,7 @@ struct get_page_nesting_size {
     if (page.src_col_schema != src_col_schema || page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
       return 0;
     }
-    return page.nesting[depth].size;
+    return page.nesting[depth].batch_size;
   }
 };
 
@@ -1420,11 +1420,10 @@ void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses
   auto& pages        = _file_itm_data.pages_info;
 
   // computes:
-  // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
-  // PageInfo::skipped_values, which tells us where to start decoding in the input to respect the
-  // user bounds.
-  // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
-  // specified artifical bounds).
+  // PageNestingInfo::batch_size for each level of nesting, for each page, taking row bounds into
+  // account. PageInfo::skipped_values, which tells us where to start decoding in the input to
+  // respect the user bounds. It is only necessary to do this second pass if uses_custom_row_bounds
+  // is set (if the user has specified artifical bounds).
   if (uses_custom_row_bounds) {
     gpu::ComputePageSizes(pages,
                           chunks,

From 4fca0c0f4510975305a0555b29eef9ac1a972b59 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Mon, 14 Nov 2022 12:20:45 -0600
Subject: [PATCH 140/162] Fix several warnings that show up in the
 spark-rapids-jni build.

---
 cpp/src/io/parquet/page_data.cu              | 47 ++++++++++----------
 cpp/src/io/parquet/reader_impl_preprocess.cu | 17 +++----
 2 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 2b8dadbb6f9..969935b5fae 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -287,7 +287,8 @@ __device__ void gpuDecodeStream(
  * 31)
  * @param[in] t Warp1 thread ID (0..31)
  *
- * @return The new output position
+ * @return A pair containing the new output position, and the total length of strings decoded (this
+ * will only be valid on thread 0 and if sizes_only is true)
  */
 template <bool sizes_only>
 __device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s* s,
@@ -343,9 +344,10 @@ __device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s*
     is_literal = shuffle(is_literal);
     batch_len  = shuffle(batch_len);
 
-    int len = 0;
+    // compute dictionary index.
+    int dict_idx = 0;
     if (t < batch_len) {
-      int dict_idx = s->dict_val;
+      dict_idx = s->dict_val;
       if (is_literal) {
         int32_t ofs      = (t - ((batch_len + 7) & ~7)) * dict_bits;
         const uint8_t* p = s->data_start + (ofs >> 3);
@@ -366,27 +368,26 @@ __device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s*
         }
       }
 
-      // if we're computing indices, store it off.
-      if constexpr (sizes_only) {
-        len = [&]() {
-          // we may end up decoding more indices than we asked for. so don't include those in the
-          // size calculation
-          if (pos + t >= target_pos) { return 0; }
-          // TODO:  refactor this with gpuGetStringData / gpuGetStringSize
-          uint32_t const dict_pos = (s->dict_bits > 0) ? dict_idx * sizeof(string_index_pair) : 0;
-          if (target_pos && dict_pos < (uint32_t)s->dict_size) {
-            const auto* src = reinterpret_cast<const string_index_pair*>(s->dict_base + dict_pos);
-            return src->second;
-          }
-          return 0;
-        }();
-      } else {
-        s->dict_idx[(pos + t) & (non_zero_buffer_size - 1)] = dict_idx;
-      }
+      // if we're not computing sizes, store off the dictionary index
+      if constexpr (!sizes_only) { s->dict_idx[(pos + t) & (non_zero_buffer_size - 1)] = dict_idx; }
     }
 
-    // if we're computing sizes, sum it
+    // if we're computing sizes, add the length(s)
     if constexpr (sizes_only) {
+      int const len = [&]() {
+        if (t >= batch_len) { return 0; }
+        // we may end up decoding more indices than we asked for. so don't include those in the
+        // size calculation
+        if (pos + t >= target_pos) { return 0; }
+        // TODO:  refactor this with gpuGetStringData / gpuGetStringSize
+        uint32_t const dict_pos = (s->dict_bits > 0) ? dict_idx * sizeof(string_index_pair) : 0;
+        if (target_pos && dict_pos < (uint32_t)s->dict_size) {
+          const auto* src = reinterpret_cast<const string_index_pair*>(s->dict_base + dict_pos);
+          return src->second;
+        }
+        return 0;
+      }();
+
       typedef cub::WarpReduce<size_type> WarpReduce;
       __shared__ typename WarpReduce::TempStorage temp_storage;
       // note: str_len will only be valid on thread 0.
@@ -1774,9 +1775,7 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
 
       // WARP1: Decode dictionary indices, booleans or string positions
       if (s->dict_base) {
-        auto const [new_target_pos, _] =
-          gpuDecodeDictionaryIndices<false>(s, src_target_pos, t & 0x1f);
-        src_target_pos = new_target_pos;
+        src_target_pos = gpuDecodeDictionaryIndices<false>(s, src_target_pos, t & 0x1f).first;
       } else if ((s->col.data_type & 7) == BOOLEAN) {
         src_target_pos = gpuDecodeRleBooleans(s, src_target_pos, t & 0x1f);
       } else if ((s->col.data_type & 7) == BYTE_ARRAY) {
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index ff274ed2384..698d91f4dc6 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -961,10 +961,6 @@ struct get_cumulative_row_info {
     auto iter =
       cudf::detail::make_counting_transform_iterator(0, [page, index] __device__(size_type i) {
         auto const& pni = page.nesting[i];
-        if (index == 1) {
-          auto const size =
-            cudf::type_dispatcher(data_type{pni.type}, row_size_functor{}, pni.size, pni.nullable);
-        }
         return cudf::type_dispatcher(
           data_type{pni.type}, row_size_functor{}, pni.size, pni.nullable);
       });
@@ -1123,12 +1119,13 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
   // generate key offsets (offsets to the start of each partition of keys). worst case is 1 page per
   // key
   rmm::device_uvector<size_type> key_offsets(page_keys.size() + 1, stream);
-  auto [_, key_offsets_end]    = thrust::reduce_by_key(rmm::exec_policy(stream),
-                                                    page_keys.begin(),
-                                                    page_keys.end(),
-                                                    thrust::make_constant_iterator(1),
-                                                    thrust::make_discard_iterator(),
-                                                    key_offsets.begin());
+  auto const key_offsets_end = thrust::reduce_by_key(rmm::exec_policy(stream),
+                                                     page_keys.begin(),
+                                                     page_keys.end(),
+                                                     thrust::make_constant_iterator(1),
+                                                     thrust::make_discard_iterator(),
+                                                     key_offsets.begin())
+                                 .second;
   size_t const num_unique_keys = key_offsets_end - key_offsets.begin();
   thrust::exclusive_scan(
     rmm::exec_policy(stream), key_offsets.begin(), key_offsets.end(), key_offsets.begin());

From cedcc078e5365b99527fbe654304babd4db1600f Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Mon, 14 Nov 2022 18:39:08 -0600
Subject: [PATCH 141/162] Several changes from PR review.

---
 cpp/src/io/parquet/page_data.cu              | 33 ++++++++++++--------
 cpp/src/io/parquet/reader_impl_preprocess.cu |  4 +--
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 969935b5fae..1a363fed38e 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -18,7 +18,7 @@
 #include <io/utilities/block_utils.cuh>
 #include <io/utilities/column_buffer.hpp>
 
-#include <cudf/detail/iterator.cuh>
+#include <cuda/std/tuple>
 #include <cudf/detail/utilities/assert.cuh>
 #include <cudf/detail/utilities/hash_functions.cuh>
 #include <cudf/detail/utilities/integer_utils.hpp>
@@ -28,9 +28,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/binary_search.h>
 #include <thrust/functional.h>
-#include <thrust/iterator/discard_iterator.h>
 #include <thrust/iterator/iterator_categories.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/transform_output_iterator.h>
@@ -291,9 +289,9 @@ __device__ void gpuDecodeStream(
  * will only be valid on thread 0 and if sizes_only is true)
  */
 template <bool sizes_only>
-__device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s* s,
-                                                          int target_pos,
-                                                          int t)
+__device__ cuda::std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s* s,
+                                                                int target_pos,
+                                                                int t)
 {
   const uint8_t* end = s->data_end;
   int dict_bits      = s->dict_bits;
@@ -388,7 +386,7 @@ __device__ std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s*
         return 0;
       }();
 
-      typedef cub::WarpReduce<size_type> WarpReduce;
+      using WarpReduce = cub::WarpReduce<size_type>;
       __shared__ typename WarpReduce::TempStorage temp_storage;
       // note: str_len will only be valid on thread 0.
       str_len += WarpReduce(temp_storage).Sum(len);
@@ -463,13 +461,14 @@ __device__ int gpuDecodeRleBooleans(volatile page_state_s* s, int target_pos, in
 }
 
 /**
- * @brief Parses the length and position of strings
+ * @brief Parses the length and position of strings and returns total length of all strings
+ * processed
  *
  * @param[in,out] s Page state input/output
  * @param[in] target_pos Target output position
  * @param[in] t Thread ID
  *
- * @return The new output position
+ * @return Total length of strings processed
  */
 __device__ size_type gpuInitStringDescriptors(volatile page_state_s* s, int target_pos, int t)
 {
@@ -504,8 +503,16 @@ __device__ size_type gpuInitStringDescriptors(volatile page_state_s* s, int targ
   return total_len;
 }
 
-inline __device__ std::pair<const char*, size_t> gpuGetStringData(volatile page_state_s* s,
-                                                                  int src_pos)
+/**
+ * @brief Retrieves string information for a string at the specified source position
+ *
+ * @param[in] s Page state input
+ * @param[in] src_pos Source position
+ *
+ * @return A pair containing a pointer to the string and it's length
+ */
+inline __device__ cuda::std::pair<const char*, size_t> gpuGetStringData(volatile page_state_s* s,
+                                                                        int src_pos)
 {
   const char* ptr = nullptr;
   size_t len      = 0;
@@ -1534,7 +1541,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   }
 }
 
-__device__ size_type gpuGetStringSizes(page_state_s* s, int target_count, int t)
+__device__ size_type gpuGetStringSize(page_state_s* s, int target_count, int t)
 {
   auto dict_target_pos = target_count;
   size_type str_len    = 0;
@@ -1678,7 +1685,7 @@ __global__ void __launch_bounds__(block_size)
       // process what we got back
       gpuUpdatePageSizes(s, actual_input_count, t, !base_pass);
       if (compute_string_sizes) {
-        auto const str_len = gpuGetStringSizes(s, s->input_leaf_count, t);
+        auto const str_len = gpuGetStringSize(s, s->input_leaf_count, t);
         if (!t) { s->page.str_bytes += str_len; }
       }
 
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 698d91f4dc6..93c17da6d07 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -844,7 +844,7 @@ void print_cumulative_page_info(hostdevice_vector<gpu::PageInfo>& pages,
 }
 
 void print_cumulative_row_info(
-  std::vector<cumulative_row_info> const& sizes,
+  host_span<cumulative_row_info const> sizes,
   std::string const& label,
   std::optional<std::vector<gpu::chunk_read_info>> splits = std::nullopt)
 {
@@ -950,7 +950,7 @@ __device__ size_t row_size_functor::operator()<string_view>(size_t num_rows, boo
 struct get_cumulative_row_info {
   gpu::PageInfo const* const pages;
 
-  cumulative_row_info operator() __device__(size_type index)
+  __device__ cumulative_row_info operator()(size_type index)
   {
     auto const& page = pages[index];
     if (page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {

From 73503bbf2a122cad2311b96c06fc71e661127085 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 14 Nov 2022 20:43:32 -0800
Subject: [PATCH 142/162] Fix typo

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 0c20ee417ac..6d42e9fab84 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -78,7 +78,7 @@ class reader::impl {
   /**
    * @brief Constructor from a chunk read limit and an array of dataset sources with reader options.
    *
-   * By using this constructor, the reader will supports iterative (chunked) reading through
+   * By using this constructor, the reader will support iterative (chunked) reading through
    * `has_next() ` and `read_chunk()`. For example:
    * ```
    *  do {

From e9905b8359c424a0d057e6a49f7120558341108b Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 14 Nov 2022 20:43:36 -0800
Subject: [PATCH 143/162] Fix test

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index e89c055b23f..0aa5cb65d96 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -70,7 +70,7 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
   if (nullable) {
     // Generate deterministic bitmask instead of random bitmask for easy computation of data size.
     auto const valid_iter = cudf::detail::make_counting_transform_iterator(
-      0, [](cudf::size_type i) { return i % 4 == 3 ? 0 : 1; });
+      0, [](cudf::size_type i) { return i % 4 != 3; });
 
     cudf::size_type offset{0};
     for (auto& col : input_columns) {

From 4072a80f4bd16f46a110092588765fbb9efab9d7 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 14 Nov 2022 20:44:02 -0800
Subject: [PATCH 144/162] Address review comments

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/include/cudf/io/detail/parquet.hpp | 10 +++--
 cpp/include/cudf/io/parquet.hpp        |  8 +++-
 cpp/src/io/functions.cpp               | 14 ++++++-
 cpp/src/io/parquet/reader.cpp          |  4 +-
 cpp/src/io/parquet/reader_impl.cpp     | 51 +++++++++++++++-----------
 5 files changed, 56 insertions(+), 31 deletions(-)

diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp
index 31b7c71b769..7f107017864 100644
--- a/cpp/include/cudf/io/detail/parquet.hpp
+++ b/cpp/include/cudf/io/detail/parquet.hpp
@@ -87,7 +87,7 @@ class reader {
  * This class intentionally subclasses the `reader` class with private inheritance to hide the
  * `reader::read()` API. As such, only chunked reading APIs are supported.
  */
-class chunked_reader : reader {
+class chunked_reader : private reader {
  public:
   /**
    * @brief Constructor from a read size limit and an array of data sources with reader options.
@@ -119,18 +119,22 @@ class chunked_reader : reader {
 
   /**
    * @brief Destructor explicitly-declared to avoid inlined in header.
+   *
+   * Since the declaration of the internal `_impl` object does not exist in this header, this
+   * destructor needs to be defined in a separate source file which can access to that object's
+   * declaration.
    */
   ~chunked_reader();
 
   /**
    * @copydoc cudf::io::chunked_parquet_reader::has_next
    */
-  bool has_next();
+  [[nodiscard]] bool has_next() const;
 
   /**
    * @copydoc cudf::io::chunked_parquet_reader::read_chunk
    */
-  table_with_metadata read_chunk();
+  [[nodiscard]] table_with_metadata read_chunk() const;
 };
 
 /**
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index b866fa00a55..f3facae098d 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -436,6 +436,10 @@ class chunked_parquet_reader {
 
   /**
    * @brief Destructor, destroying the internal reader instance.
+   *
+   * Since the declaration of the internal `reader` object does not exist in this header, this
+   * destructor needs to be defined in a separate source file which can access to that object's
+   * declaration.
    */
   ~chunked_parquet_reader();
 
@@ -444,7 +448,7 @@ class chunked_parquet_reader {
    *
    * @return A boolean value indicating if there is any data left to read
    */
-  bool has_next();
+  [[nodiscard]] bool has_next() const;
 
   /**
    * @brief Read a chunk of rows in the given Parquet file.
@@ -457,7 +461,7 @@ class chunked_parquet_reader {
    *
    * @return An output `cudf::table` along with its metadata
    */
-  table_with_metadata read_chunk();
+  [[nodiscard]] table_with_metadata read_chunk() const;
 
  private:
   std::unique_ptr<cudf::io::detail::parquet::chunked_reader> reader;
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 0933619ae87..1a5a43d2b90 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -523,12 +523,22 @@ chunked_parquet_reader::~chunked_parquet_reader() = default;
 /**
  * @copydoc cudf::io::chunked_parquet_reader::has_next
  */
-bool chunked_parquet_reader::has_next() { return reader->has_next(); }
+bool chunked_parquet_reader::has_next() const
+{
+  CUDF_FUNC_RANGE();
+  CUDF_EXPECTS(reader != nullptr, "Reader has not been constructed properly.");
+  return reader->has_next();
+}
 
 /**
  * @copydoc cudf::io::chunked_parquet_reader::read_chunk
  */
-table_with_metadata chunked_parquet_reader::read_chunk() { return reader->read_chunk(); }
+table_with_metadata chunked_parquet_reader::read_chunk() const
+{
+  CUDF_FUNC_RANGE();
+  CUDF_EXPECTS(reader != nullptr, "Reader has not been constructed properly.");
+  return reader->read_chunk();
+}
 
 /**
  * @copydoc cudf::io::parquet_chunked_writer::parquet_chunked_writer
diff --git a/cpp/src/io/parquet/reader.cpp b/cpp/src/io/parquet/reader.cpp
index de79d93cfb3..1321e8073d7 100644
--- a/cpp/src/io/parquet/reader.cpp
+++ b/cpp/src/io/parquet/reader.cpp
@@ -51,8 +51,8 @@ chunked_reader::chunked_reader(std::size_t chunk_read_limit,
 
 chunked_reader::~chunked_reader() = default;
 
-bool chunked_reader::has_next() { return _impl->has_next(); }
+bool chunked_reader::has_next() const { return _impl->has_next(); }
 
-table_with_metadata chunked_reader::read_chunk() { return _impl->read_chunk(); }
+table_with_metadata chunked_reader::read_chunk() const { return _impl->read_chunk(); }
 
 }  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index 9e07b62dc3b..20ae895fd03 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -141,8 +141,6 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows)
 
   gpu::DecodePageData(pages, chunks, num_rows, skip_rows, _stream);
 
-  _stream.synchronize();
-
   pages.device_to_host(_stream);
   page_nesting.device_to_host(_stream);
   _stream.synchronize();
@@ -210,7 +208,20 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                    parquet_reader_options const& options,
                    rmm::cuda_stream_view stream,
                    rmm::mr::device_memory_resource* mr)
-  : _stream(stream), _mr(mr), _sources(std::move(sources))
+  : impl(0 /*chunk_read_limit*/,
+         std::forward<std::vector<std::unique_ptr<cudf::io::datasource>>>(sources),
+         options,
+         stream,
+         mr)
+{
+}
+
+reader::impl::impl(std::size_t chunk_read_limit,
+                   std::vector<std::unique_ptr<datasource>>&& sources,
+                   parquet_reader_options const& options,
+                   rmm::cuda_stream_view stream,
+                   rmm::mr::device_memory_resource* mr)
+  : _stream{stream}, _mr{mr}, _sources{std::move(sources)}, _chunk_read_limit{chunk_read_limit}
 {
   // Open and parse the source dataset metadata
   _metadata = std::make_unique<aggregate_reader_metadata>(_sources);
@@ -232,25 +243,15 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                               options.is_enabled_use_pandas_metadata(),
                               _strings_to_categorical,
                               _timestamp_type.id());
-}
-
-reader::impl::impl(std::size_t chunk_read_limit,
-                   std::vector<std::unique_ptr<datasource>>&& sources,
-                   parquet_reader_options const& options,
-                   rmm::cuda_stream_view stream,
-                   rmm::mr::device_memory_resource* mr)
-  : impl(std::forward<std::vector<std::unique_ptr<cudf::io::datasource>>>(sources),
-         options,
-         stream,
-         mr)
-{
-  _chunk_read_limit = chunk_read_limit;
 
   // Save the states of the output buffers for reuse in `chunk_read()`.
-  for (auto const& buff : _output_buffers) {
-    auto& new_buff =
-      _output_buffers_template.emplace_back(column_buffer(buff.type, buff.is_nullable));
-    copy_output_buffer(buff, new_buff);
+  //  if (_chunk_read_limit > 0)
+  {
+    for (auto const& buff : _output_buffers) {
+      auto& new_buff =
+        _output_buffers_template.emplace_back(column_buffer(buff.type, buff.is_nullable));
+      copy_output_buffer(buff, new_buff);
+    }
   }
 }
 
@@ -369,13 +370,19 @@ table_with_metadata reader::impl::read_chunk()
     copy_output_buffer(buff, new_buff);
   }
 
-  prepare_data(0, -1, true, {});
+  prepare_data(0 /*skip_rows*/,
+               -1 /*num_rows, `-1` means unlimited*/,
+               true /*uses_custom_row_bounds*/,
+               {} /*row_group_indices, empty means read all row groups*/);
   return read_chunk_internal(true);
 }
 
 bool reader::impl::has_next()
 {
-  prepare_data(0, -1, true, {});
+  prepare_data(0 /*skip_rows*/,
+               -1 /*num_rows, `-1` means unlimited*/,
+               true /*uses_custom_row_bounds*/,
+               {} /*row_group_indices, empty means read all row groups*/);
   return _current_read_chunk < _chunk_read_info.size();
 }
 

From 95a97fb8af8e2395a62cd2b6371573d1abfc901f Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 14 Nov 2022 20:45:34 -0800
Subject: [PATCH 145/162] Small optimization

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index 20ae895fd03..4f7eb3db7b3 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -245,8 +245,8 @@ reader::impl::impl(std::size_t chunk_read_limit,
                               _timestamp_type.id());
 
   // Save the states of the output buffers for reuse in `chunk_read()`.
-  //  if (_chunk_read_limit > 0)
-  {
+  // Don't need to do it if we read the file all at once.
+  if (_chunk_read_limit > 0) {
     for (auto const& buff : _output_buffers) {
       auto& new_buff =
         _output_buffers_template.emplace_back(column_buffer(buff.type, buff.is_nullable));
@@ -364,10 +364,13 @@ table_with_metadata reader::impl::read(size_type skip_rows,
 table_with_metadata reader::impl::read_chunk()
 {
   // Reset the output buffers to their original states (right after reader construction).
-  _output_buffers.resize(0);
-  for (auto const& buff : _output_buffers_template) {
-    auto& new_buff = _output_buffers.emplace_back(column_buffer(buff.type, buff.is_nullable));
-    copy_output_buffer(buff, new_buff);
+  // Don't need to do it if we read the file all at once.
+  if (_chunk_read_limit > 0) {
+    _output_buffers.resize(0);
+    for (auto const& buff : _output_buffers_template) {
+      auto& new_buff = _output_buffers.emplace_back(column_buffer(buff.type, buff.is_nullable));
+      copy_output_buffer(buff, new_buff);
+    }
   }
 
   prepare_data(0 /*skip_rows*/,

From 8390d4b9141fbb74ea8fd85de8ee9bc9fe5b627c Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 14 Nov 2022 20:57:48 -0800
Subject: [PATCH 146/162] Implement `column_buffer::empty_like`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl.cpp     | 29 ++------------------------
 cpp/src/io/utilities/column_buffer.cpp | 27 ++++++++++++++++++++++++
 cpp/src/io/utilities/column_buffer.hpp |  6 +++++-
 3 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index 4f7eb3db7b3..d52288e9398 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -22,28 +22,6 @@
 
 namespace cudf::io::detail::parquet {
 
-namespace {
-
-/**
- * @brief Recursively copy the output buffer from one to another.
- *
- * This only copies `name` and `user_data` fields, which are generated during reader construction.
- *
- * @param buff The old output buffer
- * @param new_buff The new output buffer
- */
-void copy_output_buffer(column_buffer const& buff, column_buffer& new_buff)
-{
-  new_buff.name      = buff.name;
-  new_buff.user_data = buff.user_data;
-  for (auto const& child : buff.children) {
-    auto& new_child = new_buff.children.emplace_back(column_buffer(child.type, child.is_nullable));
-    copy_output_buffer(child, new_child);
-  }
-}
-
-}  // namespace
-
 void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows)
 {
   auto& chunks       = _file_itm_data.chunks;
@@ -248,9 +226,7 @@ reader::impl::impl(std::size_t chunk_read_limit,
   // Don't need to do it if we read the file all at once.
   if (_chunk_read_limit > 0) {
     for (auto const& buff : _output_buffers) {
-      auto& new_buff =
-        _output_buffers_template.emplace_back(column_buffer(buff.type, buff.is_nullable));
-      copy_output_buffer(buff, new_buff);
+      _output_buffers_template.emplace_back(column_buffer::empty_like(buff));
     }
   }
 }
@@ -368,8 +344,7 @@ table_with_metadata reader::impl::read_chunk()
   if (_chunk_read_limit > 0) {
     _output_buffers.resize(0);
     for (auto const& buff : _output_buffers_template) {
-      auto& new_buff = _output_buffers.emplace_back(column_buffer(buff.type, buff.is_nullable));
-      copy_output_buffer(buff, new_buff);
+      _output_buffers.emplace_back(column_buffer::empty_like(buff));
     }
   }
 
diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp
index de145486662..89ba5c598e8 100644
--- a/cpp/src/io/utilities/column_buffer.cpp
+++ b/cpp/src/io/utilities/column_buffer.cpp
@@ -55,6 +55,33 @@ void column_buffer::create(size_type _size,
   }
 }
 
+namespace {
+
+/**
+ * @brief Recursively copy `name` and `user_data` fields of one buffer to another.
+ *
+ * @param buff The old output buffer
+ * @param new_buff The new output buffer
+ */
+void copy_buffer_data(column_buffer const& buff, column_buffer& new_buff)
+{
+  new_buff.name      = buff.name;
+  new_buff.user_data = buff.user_data;
+  for (auto const& child : buff.children) {
+    auto& new_child = new_buff.children.emplace_back(column_buffer(child.type, child.is_nullable));
+    copy_buffer_data(child, new_child);
+  }
+}
+
+}  // namespace
+
+column_buffer column_buffer::empty_like(column_buffer const& input)
+{
+  auto new_buff = column_buffer(input.type, input.is_nullable);
+  copy_buffer_data(input, new_buff);
+  return new_buff;
+}
+
 /**
  * @copydoc cudf::io::detail::make_column
  */
diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp
index 8ae3d39a3ba..8f181157fae 100644
--- a/cpp/src/io/utilities/column_buffer.hpp
+++ b/cpp/src/io/utilities/column_buffer.hpp
@@ -104,10 +104,14 @@ struct column_buffer {
   {
     return static_cast<T*>(_null_mask.data());
   }
-  auto null_mask_size() { return _null_mask.size(); };
+  auto null_mask_size() { return _null_mask.size(); }
 
   auto& null_count() { return _null_count; }
 
+  // Create a new column_buffer that has empty data but with the same basic information as the
+  // input column, including same type, nullability, name, and user_data.
+  static column_buffer empty_like(column_buffer const& input);
+
   std::unique_ptr<rmm::device_uvector<string_index_pair>> _strings;
   rmm::device_buffer _data{};
   rmm::device_buffer _null_mask{};

From 5035bf43fac31ec53e83869018c4f54383d20cbf Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Mon, 14 Nov 2022 21:10:55 -0800
Subject: [PATCH 147/162] Optimize unit tests: Only call `cudf::concatenate`
 once

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index 0aa5cb65d96..f9afd369d44 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -123,22 +123,27 @@ auto chunked_read(std::string const& filepath, std::size_t byte_limit)
   auto reader = cudf::io::chunked_parquet_reader(byte_limit, read_opts);
 
   auto num_chunks = 0;
-  auto result     = std::make_unique<cudf::table>();
+  auto out_tables = std::vector<std::unique_ptr<cudf::table>>{};
 
   do {
     auto chunk = reader.read_chunk();
-    if (num_chunks == 0) {
-      result = std::move(chunk.tbl);
-    } else {
+    // If the input file is empty, the first call to `read_chunk` will return an empty table.
+    // Thus, we only check for non-empty output table from the second call.
+    if (num_chunks > 0) {
       CUDF_EXPECTS(chunk.tbl->num_rows() != 0, "Number of rows in the new chunk is zero.");
-      result = cudf::concatenate(std::vector<cudf::table_view>{result->view(), chunk.tbl->view()});
     }
     ++num_chunks;
+    out_tables.emplace_back(std::move(chunk.tbl));
 
-    if (result->num_rows() == 0) { break; }
+    if (out_tables.back()->num_rows() == 0) { break; }
   } while (reader.has_next());
 
-  return std::pair(std::move(result), num_chunks);
+  auto out_tviews = std::vector<cudf::table_view>{};
+  for (auto const& tbl : out_tables) {
+    out_tviews.emplace_back(tbl->view());
+  }
+
+  return std::pair(cudf::concatenate(out_tviews), num_chunks);
 }
 
 }  // namespace

From 912d86c8973a4ab60d4f04f491927c4320c29de5 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 15 Nov 2022 11:34:55 -0800
Subject: [PATCH 148/162] Remove redundant check

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index f9afd369d44..f60186427cf 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -134,8 +134,6 @@ auto chunked_read(std::string const& filepath, std::size_t byte_limit)
     }
     ++num_chunks;
     out_tables.emplace_back(std::move(chunk.tbl));
-
-    if (out_tables.back()->num_rows() == 0) { break; }
   } while (reader.has_next());
 
   auto out_tviews = std::vector<cudf::table_view>{};

From bb2e26e84d0e5687984ffa22054e5f29d33483be Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 15 Nov 2022 15:20:46 -0800
Subject: [PATCH 149/162] Fix `cudaLaunchKernel` error in `DecodePageData`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/page_data.cu | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 1a363fed38e..f85205cefa7 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -1903,6 +1903,8 @@ void __host__ DecodePageData(hostdevice_vector<PageInfo>& pages,
                              size_t min_row,
                              rmm::cuda_stream_view stream)
 {
+  if (pages.size() == 0) { return; }
+
   dim3 dim_block(block_size, 1);
   dim3 dim_grid(pages.size(), 1);  // 1 threadblock per page
 

From 36c7ec256743e8aa5e58eb62caf529d065d32e48 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Tue, 15 Nov 2022 15:26:36 -0800
Subject: [PATCH 150/162] Add assertion to make sure not to decode/parse empty
 page array

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/page_data.cu              | 2 +-
 cpp/src/io/parquet/reader_impl.cpp           | 3 +++
 cpp/src/io/parquet/reader_impl_preprocess.cu | 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index f85205cefa7..f88441682f5 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -1903,7 +1903,7 @@ void __host__ DecodePageData(hostdevice_vector<PageInfo>& pages,
                              size_t min_row,
                              rmm::cuda_stream_view stream)
 {
-  if (pages.size() == 0) { return; }
+  CUDF_EXPECTS(pages.size() > 0, "There is no page to decode");
 
   dim3 dim_block(block_size, 1);
   dim3 dim_grid(pages.size(), 1);  // 1 threadblock per page
diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index d52288e9398..84d8cfc273f 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -28,6 +28,9 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows)
   auto& pages        = _file_itm_data.pages_info;
   auto& page_nesting = _file_itm_data.page_nesting_info;
 
+  // Should not reach here if there is no page data.
+  CUDF_EXPECTS(pages.size() > 0, "There is no page to decode");
+
   size_t const sum_max_depths = std::accumulate(
     chunks.begin(), chunks.end(), 0, [&](size_t cursum, gpu::ColumnChunkDesc const& chunk) {
       return cursum + _metadata->get_output_nesting_depth(chunk.src_col_schema);
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 93c17da6d07..360c59c5c22 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -1418,6 +1418,9 @@ void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses
   auto const& chunks = _file_itm_data.chunks;
   auto& pages        = _file_itm_data.pages_info;
 
+  // Should not reach here if there is no page data.
+  CUDF_EXPECTS(pages.size() > 0, "There is no page to parse");
+
   // computes:
   // PageNestingInfo::batch_size for each level of nesting, for each page, taking row bounds into
   // account. PageInfo::skipped_values, which tells us where to start decoding in the input to

From 7203c6707c9b5d336fed70054bab174fb036e5dd Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 16 Nov 2022 06:47:02 -0800
Subject: [PATCH 151/162] Address some review comments

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl_preprocess.cu | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 360c59c5c22..c189e26b3bf 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -170,7 +170,7 @@ void generate_depth_remappings(std::map<int, std::pair<std::vector<int>, std::ve
 }
 
 /**
- * @brief Function that returns the required the number of bits to store a value.
+ * @brief Return the required number of bits to store a value.
  */
 template <typename T = uint8_t>
 [[nodiscard]] T required_bits(uint32_t max_level)
@@ -1050,7 +1050,7 @@ std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> c
       // the list twice. so we have to iterate until we skip past all of them.  The idea is that we
       // either do this, or we have to call unique() on the input first.
       while (p < (static_cast<int64_t>(sizes.size()) - 1) &&
-             (sizes[p].row_count == cur_row_count || p < 0)) {
+             (p < 0 || sizes[p].row_count == cur_row_count)) {
         p++;
       }
 
@@ -1342,7 +1342,7 @@ void reader::impl::preprocess_pages(size_t skip_rows,
     // we will be applying a later "trim" pass if skip_rows/num_rows is being used, which can happen
     // if:
     // - user has passed custom row bounds
-    // - if we will be doing a chunked read
+    // - we will be doing a chunked read
     gpu::ComputePageSizes(pages,
                           chunks,
                           0,  // 0-max size_t. process all possible rows
@@ -1403,7 +1403,9 @@ void reader::impl::preprocess_pages(size_t skip_rows,
     // retrieve pages back
     pages.device_to_host(_stream, true);
 
-    // print_pages(pages, _stream);
+#if defined(PREPROCESS_DEBUG)
+    print_pages(pages, _stream);
+#endif
   }
 
   // compute splits if necessary. otherwise retun a single split representing
@@ -1434,7 +1436,9 @@ void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses
                           false,  // num_rows is already computed
                           false,  // no need to compute string sizes
                           _stream);
-    // print_pages(pages, _stream);
+#if defined(PREPROCESS_DEBUG)
+    print_pages(pages, _stream);
+#end
   }
 
   // iterate over all input columns and allocate any associated output

From 96eed8e78d641b223b55f8cad357210b539a3596 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 16 Nov 2022 10:04:12 -0800
Subject: [PATCH 152/162] Fix `#endif`

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/src/io/parquet/reader_impl_preprocess.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index c189e26b3bf..7fc2ffed3ea 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -1438,7 +1438,7 @@ void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses
                           _stream);
 #if defined(PREPROCESS_DEBUG)
     print_pages(pages, _stream);
-#end
+#endif
   }
 
   // iterate over all input columns and allocate any associated output

From 70f4fdeed4cb49877b8860ceecb65581b2f806b0 Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Thu, 17 Nov 2022 10:20:07 -0600
Subject: [PATCH 153/162] PR review changes.  Updated some incorrect/incomplete
 function docs.

---
 cpp/src/io/parquet/page_data.cu              | 34 +++++++++++---------
 cpp/src/io/parquet/reader_impl_preprocess.cu | 33 ++++++++++---------
 2 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index f88441682f5..c580aa5bbc0 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -509,7 +509,7 @@ __device__ size_type gpuInitStringDescriptors(volatile page_state_s* s, int targ
  * @param[in] s Page state input
  * @param[in] src_pos Source position
  *
- * @return A pair containing a pointer to the string and it's length
+ * @return A pair containing a pointer to the string and its length
  */
 inline __device__ cuda::std::pair<const char*, size_t> gpuGetStringData(volatile page_state_s* s,
                                                                         int src_pos)
@@ -877,15 +877,17 @@ static __device__ void gpuOutputGeneric(volatile page_state_s* s,
  * @param[in, out] s The local page state to be filled in
  * @param[in] p The global page to be copied from
  * @param[in] chunks The global list of chunks
- * @param[in] num_rows Maximum number of rows to read
  * @param[in] min_row Crop all rows below min_row
+ * @param[in] num_rows Maximum number of rows to read
+ * @param[in] is_decode_step If we are setting up for the decode step (instead of the preprocess
+ * step)
  */
 static __device__ bool setupLocalPageInfo(page_state_s* const s,
                                           PageInfo const* p,
                                           device_span<ColumnChunkDesc const> chunks,
                                           size_t min_row,
                                           size_t num_rows,
-                                          bool decode_step)
+                                          bool is_decode_step)
 {
   int t = threadIdx.x;
   int chunk_idx;
@@ -1016,7 +1018,7 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
       // NOTE: in a chunked read situation, s->col.column_data_base and s->col.valid_map_base
       // will be aliased to memory that has been freed when we get here in the non-decode step, so
       // we cannot check against nullptr.  we'll just check a flag directly.
-      if (decode_step) {
+      if (is_decode_step) {
         int max_depth = s->col.max_nesting_depth;
         for (int idx = 0; idx < max_depth; idx++) {
           PageNestingInfo* pni = &s->page.nesting[idx];
@@ -1138,7 +1140,7 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
 
       // if we're in the decoding step, jump directly to the first
       // value we care about
-      if (decode_step) {
+      if (is_decode_step) {
         s->input_value_count = s->page.skipped_values > -1 ? s->page.skipped_values : 0;
       } else {
         s->input_value_count = 0;
@@ -1564,17 +1566,19 @@ __device__ size_type gpuGetStringSize(page_state_s* s, int target_count, int t)
  * @param pages List of pages
  * @param chunks List of column chunks
  * @param min_row Row index to start reading at
- * @param num_rows Maximum number of rows to read. Pass as INT_MAX to guarantee reading all rows.
- * @param trim_pass Whether or not this is the trim pass.  We first have to compute
+ * @param num_rows Maximum number of rows to read. Pass as INT_MAX to guarantee reading all rows
+ * @param is_base_pass Whether or not this is the base pass.  We first have to compute
  * the full size information of every page before we come through in a second (trim) pass
- * to determine what subset of rows in this page we should be reading.
+ * to determine what subset of rows in this page we should be reading
+ * @param compute_string_sizes Whether or not we should be computing string sizes
+ * (PageInfo::str_bytes) as part of the pass
  */
 __global__ void __launch_bounds__(block_size)
   gpuComputePageSizes(PageInfo* pages,
                       device_span<ColumnChunkDesc const> chunks,
                       size_t min_row,
                       size_t num_rows,
-                      bool base_pass,
+                      bool is_base_pass,
                       bool compute_string_sizes)
 {
   __shared__ __align__(16) page_state_s state_g;
@@ -1595,7 +1599,7 @@ __global__ void __launch_bounds__(block_size)
 
     // in the base pass, we're computing the number of rows, make sure we visit absolutely
     // everything
-    if (base_pass) {
+    if (is_base_pass) {
       s->first_row             = 0;
       s->num_rows              = INT_MAX;
       s->row_index_lower_bound = -1;
@@ -1619,7 +1623,7 @@ __global__ void __launch_bounds__(block_size)
     while (d < s->page.num_nesting_levels) {
       auto const i = d + t;
       if (i < s->page.num_nesting_levels) {
-        if (base_pass) { pp->nesting[i].size = pp->num_input_values; }
+        if (is_base_pass) { pp->nesting[i].size = pp->num_input_values; }
         pp->nesting[i].batch_size = pp->num_input_values;
       }
       d += blockDim.x;
@@ -1629,7 +1633,7 @@ __global__ void __launch_bounds__(block_size)
 
   // - if this page is not at the beginning or end of the trim bounds, the batch size is
   //   the full page size
-  if (!base_pass && s->num_rows == s->page.num_rows) {
+  if (!is_base_pass && s->num_rows == s->page.num_rows) {
     int d = 0;
     while (d < s->page.num_nesting_levels) {
       auto const i = d + t;
@@ -1640,7 +1644,7 @@ __global__ void __launch_bounds__(block_size)
   }
 
   // - if this page is completely trimmed, zero out sizes.
-  if (!base_pass && s->num_rows == 0) {
+  if (!is_base_pass && s->num_rows == 0) {
     int d = 0;
     while (d < s->page.num_nesting_levels) {
       auto const i = d + t;
@@ -1683,7 +1687,7 @@ __global__ void __launch_bounds__(block_size)
                                               : s->lvl_count[level_type::DEFINITION];
 
       // process what we got back
-      gpuUpdatePageSizes(s, actual_input_count, t, !base_pass);
+      gpuUpdatePageSizes(s, actual_input_count, t, !is_base_pass);
       if (compute_string_sizes) {
         auto const str_len = gpuGetStringSize(s, s->input_leaf_count, t);
         if (!t) { s->page.str_bytes += str_len; }
@@ -1699,7 +1703,7 @@ __global__ void __launch_bounds__(block_size)
   // - nesting sizes for the whole page
   // - skipped value information for trimmed pages
   // - string bytes
-  if (base_pass) {
+  if (is_base_pass) {
     // nesting level 0 is the root column, so the size is also the # of rows
     if (!t) { pp->num_rows = s->page.nesting[0].batch_size; }
 
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 7fc2ffed3ea..65beca3ba05 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -988,9 +988,9 @@ struct get_cumulative_row_info {
  * page. Essentially, a conservative over-estimate of the real size.
  */
 struct row_total_size {
-  cumulative_row_info const* const c_info;
-  size_type const* const key_offsets;
-  size_t const num_keys;
+  cumulative_row_info const* c_info;
+  size_type const* key_offsets;
+  size_t num_keys;
 
   __device__ cumulative_row_info operator()(cumulative_row_info const& i)
   {
@@ -1035,13 +1035,14 @@ std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> c
     });
     auto end   = start + sizes.size();
     while (cur_row_count < num_rows) {
-      int64_t p = thrust::lower_bound(thrust::seq, start + cur_pos, end, chunk_read_limit) - start;
+      int64_t split_pos =
+        thrust::lower_bound(thrust::seq, start + cur_pos, end, chunk_read_limit) - start;
 
       // if we're past the end, or if the returned bucket is > than the chunk_read_limit, move back
       // one.
-      if (static_cast<size_t>(p) >= sizes.size() ||
-          (sizes[p].size_bytes - cur_cumulative_size > chunk_read_limit)) {
-        p--;
+      if (static_cast<size_t>(split_pos) >= sizes.size() ||
+          (sizes[split_pos].size_bytes - cur_cumulative_size > chunk_read_limit)) {
+        split_pos--;
       }
 
       // best-try. if we can't find something that'll fit, we have to go bigger. we're doing this in
@@ -1049,16 +1050,16 @@ std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> c
       // so if we had two columns, both of which had an entry {1000, 10000}, that entry would be in
       // the list twice. so we have to iterate until we skip past all of them.  The idea is that we
       // either do this, or we have to call unique() on the input first.
-      while (p < (static_cast<int64_t>(sizes.size()) - 1) &&
-             (p < 0 || sizes[p].row_count == cur_row_count)) {
-        p++;
+      while (split_pos < (static_cast<int64_t>(sizes.size()) - 1) &&
+             (split_pos < 0 || sizes[split_pos].row_count == cur_row_count)) {
+        split_pos++;
       }
 
       auto const start_row = cur_row_count;
-      cur_row_count        = sizes[p].row_count;
+      cur_row_count        = sizes[split_pos].row_count;
       splits.push_back(gpu::chunk_read_info{start_row, cur_row_count - start_row});
-      cur_pos             = p;
-      cur_cumulative_size = sizes[p].size_bytes;
+      cur_pos             = split_pos;
+      cur_cumulative_size = sizes[split_pos].size_bytes;
     }
   }
   // print_cumulative_row_info(sizes, "adjusted", splits);
@@ -1227,7 +1228,7 @@ struct start_offset_output_iterator {
   using reference         = size_type&;
   using iterator_category = thrust::output_device_iterator_tag;
 
-  __host__ __device__ void operator=(start_offset_output_iterator const& other)
+  constexpr void operator=(start_offset_output_iterator const& other)
   {
     pages          = other.pages;
     page_indices   = other.page_indices;
@@ -1236,13 +1237,13 @@ struct start_offset_output_iterator {
     nesting_depth  = other.nesting_depth;
   }
 
-  __host__ __device__ start_offset_output_iterator operator+(int i)
+  constexpr start_offset_output_iterator operator+(int i)
   {
     return start_offset_output_iterator{
       pages, page_indices, cur_index + i, src_col_schema, nesting_depth};
   }
 
-  __host__ __device__ void operator++() { cur_index++; }
+  constexpr void operator++() { cur_index++; }
 
   __device__ reference operator[](int i) { return dereference(cur_index + i); }
   __device__ reference operator*() { return dereference(cur_index); }

From 4547483b23e4cc84fdb4e02772d82ceafadb085e Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Thu, 17 Nov 2022 11:26:05 -0600
Subject: [PATCH 154/162] Made the logic in the row_total_size functor much
 more readable.

---
 cpp/src/io/parquet/reader_impl_preprocess.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 65beca3ba05..b1c38c12318 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -1000,9 +1000,9 @@ struct row_total_size {
       auto const start = key_offsets[idx];
       auto const end   = key_offsets[idx + 1];
       auto iter        = cudf::detail::make_counting_transform_iterator(
-        0, [&] __device__(size_type i) { return c_info[start + i].row_count; });
+        0, [&] __device__(size_type i) { return c_info[i].row_count; });
       auto const page_index =
-        (thrust::lower_bound(thrust::seq, iter, iter + (end - start), i.row_count) - iter) + start;
+        thrust::lower_bound(thrust::seq, iter + start, iter + end, i.row_count) - iter;
       sum += c_info[page_index].size_bytes;
     }
     return {i.row_count, sum, i.key};

From db21bc31e357a3d2bd56e97f4596256cb78a2147 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 17 Nov 2022 11:43:13 -0800
Subject: [PATCH 155/162] Fix the tests

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 cpp/tests/io/parquet_chunked_reader_test.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
index f60186427cf..76a65857e6f 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cpp
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -91,13 +91,9 @@ auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
         }
       }
 
-      // Can't use `cudf::detail::purge_nonempty_nulls` since it requires to be compiled with CUDA.
-      if (col_typeid == cudf::type_id::LIST) {
-        col = cudf::purge_nonempty_nulls(cudf::lists_column_view{col->view()});
-      } else if (col_typeid == cudf::type_id::STRUCT) {
-        col = cudf::purge_nonempty_nulls(cudf::structs_column_view{col->view()});
-      } else if (col_typeid == cudf::type_id::STRING) {
-        col = cudf::purge_nonempty_nulls(cudf::strings_column_view{col->view()});
+      if (col_typeid == cudf::type_id::LIST || col_typeid == cudf::type_id::STRUCT ||
+          col_typeid == cudf::type_id::STRING) {
+        col = cudf::purge_nonempty_nulls(col->view());
       }
     }
   }

From 36043d84a31f4933fc590d66c2234ad259f0ba6a Mon Sep 17 00:00:00 2001
From: Dave Baranec <dbaranec@nvidia.com>
Date: Thu, 17 Nov 2022 14:39:35 -0600
Subject: [PATCH 156/162] Variable renaming for clarity.

---
 cpp/src/io/parquet/reader_impl_preprocess.cu | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index b1c38c12318..38fce7d3263 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -1141,23 +1141,23 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
   // at that point.  So we have to proceed as if we are taking the bytes from all 200 rows of that
   // page.
   //
-  rmm::device_uvector<cumulative_row_info> adjusted(c_info.size(), stream);
+  rmm::device_uvector<cumulative_row_info> aggregated_info(c_info.size(), stream);
   thrust::transform(rmm::exec_policy(stream),
                     c_info_sorted.begin(),
                     c_info_sorted.end(),
-                    adjusted.begin(),
+                    aggregated_info.begin(),
                     row_total_size{c_info.data(), key_offsets.data(), num_unique_keys});
 
   // bring back to the cpu
-  std::vector<cumulative_row_info> h_adjusted(adjusted.size());
-  cudaMemcpyAsync(h_adjusted.data(),
-                  adjusted.data(),
+  std::vector<cumulative_row_info> h_aggregated_info(aggregated_info.size());
+  cudaMemcpyAsync(h_aggregated_info.data(),
+                  aggregated_info.data(),
                   sizeof(cumulative_row_info) * c_info.size(),
                   cudaMemcpyDeviceToHost,
                   stream);
   stream.synchronize();
 
-  return find_splits(h_adjusted, num_rows, chunk_read_limit);
+  return find_splits(h_aggregated_info, num_rows, chunk_read_limit);
 }
 
 struct get_page_chunk_idx {

From 52f3b4e8db7063f7a2efc1f010c84dd4e5023299 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 17 Nov 2022 20:49:54 -0800
Subject: [PATCH 157/162] Check for null handle

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 .../main/java/ai/rapids/cudf/ParquetChunkedReader.java    | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
index 360532342b7..ea16edcbc20 100644
--- a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
+++ b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
@@ -50,6 +50,10 @@ public ParquetChunkedReader(long chunkSizeByteLimit, File filePath) {
   public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, File filePath) {
     handle = create(chunkSizeByteLimit, opts.getIncludeColumnNames(), opts.getReadBinaryAsString(),
         filePath.getAbsolutePath(), 0, 0, opts.timeUnit().typeId.getNativeId());
+
+    if(handle == 0) {
+      throw new IllegalStateException("Cannot create native chunked Parquet reader object.");
+    }
   }
 
   /**
@@ -66,6 +70,10 @@ public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, HostMe
       long offset, long len) {
     handle = create(chunkSizeByteLimit, opts.getIncludeColumnNames(), opts.getReadBinaryAsString(), null,
         buffer.getAddress() + offset, len, opts.timeUnit().typeId.getNativeId());
+
+    if(handle == 0) {
+      throw new IllegalStateException("Cannot create native chunked Parquet reader object.");
+    }
   }
 
   /**

From a3f467b88afd8e19eaec1b230d0e0af63afabf39 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 17 Nov 2022 20:52:04 -0800
Subject: [PATCH 158/162] Add comments for unused variable

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 java/src/main/native/src/ChunkedReaderJni.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/java/src/main/native/src/ChunkedReaderJni.cpp b/java/src/main/native/src/ChunkedReaderJni.cpp
index 60da2a2cd53..222b1da9790 100644
--- a/java/src/main/native/src/ChunkedReaderJni.cpp
+++ b/java/src/main/native/src/ChunkedReaderJni.cpp
@@ -60,7 +60,12 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_create(
     }
 
     cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names);
-    cudf::jni::native_jbooleanArray n_col_binary_read(env, j_col_binary_read); /// << TODO
+
+    // TODO: This variable is unused now, but we still don't know what to do with it yet.
+    // As such, it needs to stay here for a little more time before we decide to use it again,
+    // or remove it completely.
+    cudf::jni::native_jbooleanArray n_col_binary_read(env, j_col_binary_read);
+    (void)n_col_binary_read;
 
     auto const source = read_buffer ?
                             cudf::io::source_info(reinterpret_cast<char *>(buffer),

From 8e84cfd7f89ed45ac83c9f9038d2139ca0b1bb44 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 17 Nov 2022 20:53:14 -0800
Subject: [PATCH 159/162] Fix comment

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 java/src/main/native/src/ChunkedReaderJni.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/src/main/native/src/ChunkedReaderJni.cpp b/java/src/main/native/src/ChunkedReaderJni.cpp
index 222b1da9790..ef19e12b99e 100644
--- a/java/src/main/native/src/ChunkedReaderJni.cpp
+++ b/java/src/main/native/src/ChunkedReaderJni.cpp
@@ -56,7 +56,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_create(
     cudf::jni::auto_set_device(env);
     cudf::jni::native_jstring filename(env, inp_file_path);
     if (!read_buffer && filename.is_empty()) {
-      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "inp_file_path can't be empty", 0);
+      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "inp_file_path cannot be empty", 0);
     }
 
     cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names);

From a48c9a3dea1be2612452e1c894ac45d958faff1b Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 17 Nov 2022 20:57:07 -0800
Subject: [PATCH 160/162] Change header include style

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 java/src/main/native/src/ChunkedReaderJni.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/java/src/main/native/src/ChunkedReaderJni.cpp b/java/src/main/native/src/ChunkedReaderJni.cpp
index ef19e12b99e..553ec46d569 100644
--- a/java/src/main/native/src/ChunkedReaderJni.cpp
+++ b/java/src/main/native/src/ChunkedReaderJni.cpp
@@ -21,9 +21,8 @@
 #include <cudf/io/parquet.hpp>
 #include <cudf/table/table.hpp>
 
-#include "../include/jni_utils.hpp"
-
 #include "cudf_jni_apis.hpp"
+#include "jni_utils.hpp"
 
 // This function is defined in `TableJni.cpp`.
 jlongArray

From 3bbf5d02e218d63def736054346a0c1ef287b95f Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 17 Nov 2022 21:59:23 -0700
Subject: [PATCH 161/162] Update
 java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java

Co-authored-by: Gera Shegalov <gshegalov@nvidia.com>
---
 java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
index ea16edcbc20..2fca446e4e7 100644
--- a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
+++ b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
@@ -101,11 +101,7 @@ public boolean hasNext() {
    */
   public Table readChunk() {
     long[] columnPtrs = readChunk(handle);
-    if (columnPtrs == null) {
-      return null;
-    } else {
-      return new Table(columnPtrs);
-    }
+    return columnPtrs != null ? new Table(columnPtrs) : null;
   }
 
   @Override

From 65917045019346bb88f77eb3fe3c2970cb1b46f2 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 18 Nov 2022 06:19:27 -0800
Subject: [PATCH 162/162] Check for null handle

Signed-off-by: Nghia Truong <nghiatruong.vn@gmail.com>
---
 .../main/java/ai/rapids/cudf/ParquetChunkedReader.java    | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
index 2fca446e4e7..c34336ac73f 100644
--- a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
+++ b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
@@ -82,6 +82,10 @@ public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, HostMe
    * @return A boolean value indicating if there is more data to read from file.
    */
   public boolean hasNext() {
+    if(handle == 0) {
+      throw new IllegalStateException("Native chunked Parquet reader object may have been closed.");
+    }
+
     if (firstCall) {
       // This function needs to return true at least once, so an empty table
       // (but having empty columns instead of no column) can be returned by readChunk()
@@ -100,6 +104,10 @@ public boolean hasNext() {
    * @return A table of new rows reading from the given file.
    */
   public Table readChunk() {
+    if(handle == 0) {
+      throw new IllegalStateException("Native chunked Parquet reader object may have been closed.");
+    }
+
     long[] columnPtrs = readChunk(handle);
     return columnPtrs != null ? new Table(columnPtrs) : null;
   }