diff --git a/cpp/benchmarks/io/csv/csv_reader.cpp b/cpp/benchmarks/io/csv/csv_reader.cpp
index c50f5220200..6f5e7160cd3 100644
--- a/cpp/benchmarks/io/csv/csv_reader.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader.cpp
@@ -52,6 +52,7 @@ void BM_csv_read_varying_input(benchmark::State& state)
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::read_csv(read_options);
   }
@@ -98,6 +99,7 @@ void BM_csv_read_varying_options(benchmark::State& state)
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
   auto mem_stats_logger               = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
       // only read the header in the first chunk
diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
index afe0cc77a4c..7d356263220 100644
--- a/cpp/benchmarks/io/cuio_common.cpp
+++ b/cpp/benchmarks/io/cuio_common.cpp
@@ -141,3 +141,31 @@ std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks,
 
   return selected_segments;
 }
+
+// Executes the command and returns stderr output
+std::string exec_cmd(std::string_view cmd)
+{
+  // Switch stderr and stdout to only capture stderr
+  auto const redirected_cmd = std::string{"( "}.append(cmd).append(" 3>&2 2>&1 1>&3) 2>/dev/null");
+  std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(redirected_cmd.c_str(), "r"), pclose);
+  CUDF_EXPECTS(pipe != nullptr, "popen() failed");
+
+  std::array<char, 128> buffer;
+  std::string error_out;
+  while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
+    error_out += buffer.data();
+  }
+  return error_out;
+}
+
+void try_drop_l3_cache()
+{
+  static bool is_drop_cache_enabled = std::getenv("CUDF_BENCHMARK_DROP_CACHE") != nullptr;
+  if (not is_drop_cache_enabled) { return; }
+
+  std::array drop_cache_cmds{"/sbin/sysctl vm.drop_caches=3", "sudo /sbin/sysctl vm.drop_caches=3"};
+  CUDF_EXPECTS(std::any_of(drop_cache_cmds.cbegin(),
+                           drop_cache_cmds.cend(),
+                           [](auto& cmd) { return exec_cmd(cmd).empty(); }),
+               "Failed to execute the drop cache command");
+}
diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
index 2ed534d5333..ff900d20e6f 100644
--- a/cpp/benchmarks/io/cuio_common.hpp
+++ b/cpp/benchmarks/io/cuio_common.hpp
@@ -132,3 +132,13 @@ std::vector<std::string> select_column_names(std::vector<std::string> const& col
  * The segments could be Parquet row groups or ORC stripes.
  */
 std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks, int chunk);
+
+/**
+ * @brief Drops L3 cache if `CUDF_BENCHMARK_DROP_CACHE` environment variable is set.
+ *
+ * Has no effect if the environment variable is not set.
+ * May require sudo access ro run successfully.
+ *
+ * @throw cudf::logic_error if the environment variable is set and the command fails
+ */
+void try_drop_l3_cache();
diff --git a/cpp/benchmarks/io/orc/orc_reader.cpp b/cpp/benchmarks/io/orc/orc_reader.cpp
index 0fc2238a272..fc76fbe7603 100644
--- a/cpp/benchmarks/io/orc/orc_reader.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader.cpp
@@ -60,6 +60,7 @@ void BM_orc_read_varying_input(benchmark::State& state)
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::read_orc(read_opts);
   }
@@ -117,6 +118,7 @@ void BM_orc_read_varying_options(benchmark::State& state)
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
   auto mem_stats_logger               = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
 
     cudf::size_type rows_read = 0;
diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
index 525c13af5c0..f61dac7677b 100644
--- a/cpp/benchmarks/io/orc/orc_writer.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "cudf/io/types.hpp"
 #include <benchmark/benchmark.h>
 
 #include <benchmarks/common/generate_input.hpp>
@@ -23,6 +22,7 @@
 #include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/io/orc.hpp>
+#include <cudf/io/types.hpp>
 
 // to enable, run cmake with -DBUILD_BENCHMARKS=ON
 
diff --git a/cpp/benchmarks/io/parquet/parquet_reader.cpp b/cpp/benchmarks/io/parquet/parquet_reader.cpp
index 8a97fd35c31..b20534e8ac0 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader.cpp
@@ -60,6 +60,7 @@ void BM_parq_read_varying_input(benchmark::State& state)
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer const raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::read_parquet(read_opts);
   }
@@ -117,6 +118,7 @@ void BM_parq_read_varying_options(benchmark::State& state)
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
   auto mem_stats_logger               = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
 
     cudf::size_type rows_read = 0;
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index ada8856e8e5..af6c2c5e030 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -137,6 +137,7 @@ static void BM_multibyte_split(benchmark::State& state)
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);
     auto output = cudf::io::text::multibyte_split(*source, delim);
   }
diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp
index 22acb241f0b..c3c77ebd52f 100644
--- a/cpp/benchmarks/sort/rank.cpp
+++ b/cpp/benchmarks/sort/rank.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "cudf/column/column_view.hpp"
+#include <cudf/column/column_view.hpp>
 #include <cudf/sorting.hpp>
 
 #include <cudf_test/base_fixture.hpp>
diff --git a/cpp/benchmarks/string/convert_durations.cpp b/cpp/benchmarks/string/convert_durations.cpp
index dc9a1e991b2..8af111d9a63 100644
--- a/cpp/benchmarks/string/convert_durations.cpp
+++ b/cpp/benchmarks/string/convert_durations.cpp
@@ -13,25 +13,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#include <benchmark/benchmark.h>
-
+#include <cudf/column/column_view.hpp>
 #include <cudf/strings/convert/convert_durations.hpp>
 #include <cudf/types.hpp>
+#include <cudf/wrappers/durations.hpp>
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 
+#include <benchmark/benchmark.h>
+
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
 #include <algorithm>
 #include <random>
 
-#include "../fixture/benchmark_fixture.hpp"
-#include "../synchronization/synchronization.hpp"
-#include "cudf/column/column_view.hpp"
-#include "cudf/wrappers/durations.hpp"
-
 class DurationsToString : public cudf::benchmark {
 };
 template <class TypeParam>
diff --git a/cpp/benchmarks/text/subword.cpp b/cpp/benchmarks/text/subword.cpp
index 150f578a22a..b8311324f70 100644
--- a/cpp/benchmarks/text/subword.cpp
+++ b/cpp/benchmarks/text/subword.cpp
@@ -21,6 +21,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 
+#include <filesystem>
 #include <fstream>
 #include <iostream>
 #include <vector>
@@ -29,7 +30,7 @@
 
 static std::string create_hash_vocab_file()
 {
-  std::string dir_template("/tmp");
+  std::string dir_template{std::filesystem::temp_directory_path().string()};
   if (const char* env_p = std::getenv("WORKSPACE")) dir_template = env_p;
   std::string hash_file = dir_template + "/hash_vocab.txt";
   // create a fake hashed vocab text file for this test
diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake
index 1639655d1e9..5232821d113 100644
--- a/cpp/cmake/thirdparty/get_cucollections.cmake
+++ b/cpp/cmake/thirdparty/get_cucollections.cmake
@@ -21,12 +21,14 @@ function(find_and_configure_cucollections)
     cuco 0.0.1
     GLOBAL_TARGETS cuco::cuco
     BUILD_EXPORT_SET cudf-exports
-    INSTALL_EXPORT_SET cudf-exports
     CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections
     GIT_TAG fb58a38701f1c24ecfe07d8f1f208bbe80930da5
     EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS}
     OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF"
   )
+  if(NOT BUILD_SHARED_LIBS)
+    rapids_export_package(INSTALL cuco cudf-exports)
+  endif()
 
 endfunction()
 
diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp
index 3a6113e66ce..317e4d0cf47 100644
--- a/cpp/include/cudf/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/detail/reduction_functions.hpp
@@ -17,9 +17,9 @@
 #pragma once
 
 #include <cudf/column/column_view.hpp>
+#include <cudf/lists/lists_column_view.hpp>
 #include <cudf/scalar/scalar.hpp>
 
-#include "cudf/lists/lists_column_view.hpp"
 #include <rmm/cuda_stream_view.hpp>
 
 namespace cudf {
diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp
index 7e4ab5b8d9d..23ed0153f3f 100644
--- a/cpp/include/cudf/io/types.hpp
+++ b/cpp/include/cudf/io/types.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -244,6 +244,7 @@ class column_in_metadata {
   bool _use_int96_timestamp = false;
   // bool _output_as_binary = false;
   thrust::optional<uint8_t> _decimal_precision;
+  thrust::optional<int32_t> _parquet_field_id;
   std::vector<column_in_metadata> children;
 
  public:
@@ -324,6 +325,18 @@ class column_in_metadata {
     return *this;
   }
 
+  /**
+   * @brief Set the parquet field id of this column.
+   *
+   * @param field_id The parquet field id to set
+   * @return this for chaining
+   */
+  column_in_metadata& set_parquet_field_id(int32_t field_id)
+  {
+    _parquet_field_id = field_id;
+    return *this;
+  }
+
   /**
    * @brief Get reference to a child of this column
    *
@@ -379,6 +392,18 @@ class column_in_metadata {
    */
   [[nodiscard]] uint8_t get_decimal_precision() const { return _decimal_precision.value(); }
 
+  /**
+   * @brief Get whether parquet field id has been set for this column.
+   */
+  [[nodiscard]] bool is_parquet_field_id_set() const { return _parquet_field_id.has_value(); }
+
+  /**
+   * @brief Get the parquet field id that was set for this column.
+   * @throws If parquet field id was not set for this column.
+   *         Check using `is_parquet_field_id_set()` first.
+   */
+  [[nodiscard]] int32_t get_parquet_field_id() const { return _parquet_field_id.value(); }
+
   /**
    * @brief Get the number of children of this column
    */
diff --git a/cpp/include/cudf_test/file_utilities.hpp b/cpp/include/cudf_test/file_utilities.hpp
index 4df7b6a69c8..d722b836674 100644
--- a/cpp/include/cudf_test/file_utilities.hpp
+++ b/cpp/include/cudf_test/file_utilities.hpp
@@ -18,6 +18,7 @@
 
 #include <cstdio>
 #include <cstdlib>
+#include <filesystem>
 #include <string>
 
 #include <ftw.h>
@@ -34,17 +35,14 @@ class temp_directory {
  public:
   temp_directory(const std::string& base_name)
   {
-    std::string dir_template("/tmp");
-    if (const char* env_p = std::getenv("WORKSPACE")) dir_template = env_p;
+    std::string dir_template{std::filesystem::temp_directory_path().string()};
+    if (auto env_p = std::getenv("WORKSPACE")) dir_template = env_p;
+
     dir_template += "/" + base_name + ".XXXXXX";
     auto const tmpdirptr = mkdtemp(const_cast<char*>(dir_template.data()));
-    if (tmpdirptr == nullptr) CUDF_FAIL("Temporary directory creation failure: " + dir_template);
-    _path = dir_template + "/";
-  }
+    CUDF_EXPECTS(tmpdirptr != nullptr, "Temporary directory creation failure: " + dir_template);
 
-  static int rm_files(const char* pathname, const struct stat* sbuf, int type, struct FTW* ftwb)
-  {
-    return std::remove(pathname);
+    _path = dir_template + "/";
   }
 
   temp_directory& operator=(temp_directory const&) = delete;
@@ -52,11 +50,7 @@ class temp_directory {
   temp_directory& operator=(temp_directory&&) = default;
   temp_directory(temp_directory&&)            = default;
 
-  ~temp_directory()
-  {
-    // TODO: should use std::filesystem instead, once C++17 support added
-    nftw(_path.c_str(), rm_files, 10, FTW_DEPTH | FTW_MOUNT | FTW_PHYS);
-  }
+  ~temp_directory() { std::filesystem::remove_all(std::filesystem::path{_path}); }
 
   /**
    * @brief Returns the path of the temporary directory
diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp
index 6b98747c145..79a40640627 100644
--- a/cpp/libcudf_kafka/src/kafka_callback.cpp
+++ b/cpp/libcudf_kafka/src/kafka_callback.cpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "cudf_kafka/kafka_callback.hpp"
+#include <cudf_kafka/kafka_callback.hpp>
 
 #include <librdkafka/rdkafkacpp.h>
 
diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp
index 49e89a56e60..2ddaa9892da 100644
--- a/cpp/libcudf_kafka/src/kafka_consumer.cpp
+++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "cudf_kafka/kafka_consumer.hpp"
+#include <cudf_kafka/kafka_consumer.hpp>
 
 #include <librdkafka/rdkafkacpp.h>
 
diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp
index 7feaa8e61b4..a1fc2edb0bb 100644
--- a/cpp/src/io/parquet/compact_protocol_reader.cpp
+++ b/cpp/src/io/parquet/compact_protocol_reader.cpp
@@ -156,6 +156,7 @@ bool CompactProtocolReader::read(SchemaElement* s)
                             ParquetFieldEnum<ConvertedType>(6, s->converted_type),
                             ParquetFieldInt32(7, s->decimal_scale),
                             ParquetFieldInt32(8, s->decimal_precision),
+                            ParquetFieldOptionalInt32(9, s->field_id),
                             ParquetFieldStruct(10, s->logical_type));
   return function_builder(this, op);
 }
diff --git a/cpp/src/io/parquet/compact_protocol_reader.hpp b/cpp/src/io/parquet/compact_protocol_reader.hpp
index ba48f7b127f..ddca6c37e08 100644
--- a/cpp/src/io/parquet/compact_protocol_reader.hpp
+++ b/cpp/src/io/parquet/compact_protocol_reader.hpp
@@ -18,6 +18,8 @@
 
 #include "parquet.hpp"
 
+#include <thrust/optional.h>
+
 #include <algorithm>
 #include <cstddef>
 #include <string>
@@ -137,6 +139,7 @@ class CompactProtocolReader {
   friend class ParquetFieldBool;
   friend class ParquetFieldInt8;
   friend class ParquetFieldInt32;
+  friend class ParquetFieldOptionalInt32;
   friend class ParquetFieldInt64;
   template <typename T>
   friend class ParquetFieldStructListFunctor;
@@ -216,6 +219,27 @@ class ParquetFieldInt32 {
   int field() { return field_val; }
 };
 
+/**
+ * @brief Functor to set value to optional 32 bit integer read from CompactProtocolReader
+ *
+ * @return True if field type is not int32
+ */
+class ParquetFieldOptionalInt32 {
+  int field_val;
+  thrust::optional<int32_t>& val;
+
+ public:
+  ParquetFieldOptionalInt32(int f, thrust::optional<int32_t>& v) : field_val(f), val(v) {}
+
+  inline bool operator()(CompactProtocolReader* cpr, int field_type)
+  {
+    val = cpr->get_i32();
+    return (field_type != ST_FLD_I32);
+  }
+
+  int field() { return field_val; }
+};
+
 /**
  * @brief Functor to set value to 64 bit integer read from CompactProtocolReader
  *
diff --git a/cpp/src/io/parquet/compact_protocol_writer.cpp b/cpp/src/io/parquet/compact_protocol_writer.cpp
index 927844cb1c2..176ecb6a572 100644
--- a/cpp/src/io/parquet/compact_protocol_writer.cpp
+++ b/cpp/src/io/parquet/compact_protocol_writer.cpp
@@ -144,6 +144,7 @@ size_t CompactProtocolWriter::write(const SchemaElement& s)
       c.field_int(8, s.decimal_precision);
     }
   }
+  if (s.field_id) { c.field_int(9, s.field_id.value()); }
   auto const isset = s.logical_type.isset;
   // TODO: add handling for all logical types
   // if (isset.STRING or isset.MAP or isset.LIST or isset.ENUM or isset.DECIMAL or isset.DATE or
diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp
index b1800640c91..ccaf3485bdf 100644
--- a/cpp/src/io/parquet/parquet.hpp
+++ b/cpp/src/io/parquet/parquet.hpp
@@ -18,6 +18,8 @@
 
 #include "parquet_common.hpp"
 
+#include <thrust/optional.h>
+
 #include <cstdint>
 #include <string>
 #include <vector>
@@ -145,6 +147,7 @@ struct SchemaElement {
   int32_t num_children                = 0;
   int32_t decimal_scale               = 0;
   int32_t decimal_precision           = 0;
+  thrust::optional<int32_t> field_id  = thrust::nullopt;
 
   // The following fields are filled in later during schema initialization
   int max_definition_level = 0;
@@ -157,7 +160,8 @@ struct SchemaElement {
     return type == other.type && converted_type == other.converted_type &&
            type_length == other.type_length && repetition_type == other.repetition_type &&
            name == other.name && num_children == other.num_children &&
-           decimal_scale == other.decimal_scale && decimal_precision == other.decimal_precision;
+           decimal_scale == other.decimal_scale && decimal_precision == other.decimal_precision &&
+           field_id == other.field_id;
   }
 
   // the parquet format is a little squishy when it comes to interpreting
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index cb1acb4d9ec..4bc084c61d0 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -491,6 +491,13 @@ std::vector<schema_tree_node> construct_schema_tree(
     [&](cudf::detail::LinkedColPtr const& col, column_in_metadata& col_meta, size_t parent_idx) {
       bool col_nullable = is_col_nullable(col, col_meta, single_write_mode);
 
+      auto set_field_id = [&schema, parent_idx](schema_tree_node& s,
+                                                column_in_metadata const& col_meta) {
+        if (schema[parent_idx].name != "list" and col_meta.is_parquet_field_id_set()) {
+          s.field_id = col_meta.get_parquet_field_id();
+        }
+      };
+
       if (col->type().id() == type_id::STRUCT) {
         // if struct, add current and recursively call for all children
         schema_tree_node struct_schema{};
@@ -500,6 +507,7 @@ std::vector<schema_tree_node> construct_schema_tree(
         struct_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name();
         struct_schema.num_children = col->children.size();
         struct_schema.parent_idx   = parent_idx;
+        set_field_id(struct_schema, col_meta);
         schema.push_back(std::move(struct_schema));
 
         auto struct_node_index = schema.size() - 1;
@@ -524,6 +532,7 @@ std::vector<schema_tree_node> construct_schema_tree(
         list_schema_1.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name();
         list_schema_1.num_children = 1;
         list_schema_1.parent_idx   = parent_idx;
+        set_field_id(list_schema_1, col_meta);
         schema.push_back(std::move(list_schema_1));
 
         schema_tree_node list_schema_2{};
@@ -555,7 +564,10 @@ std::vector<schema_tree_node> construct_schema_tree(
         map_schema.converted_type = ConvertedType::MAP;
         map_schema.repetition_type =
           col_nullable ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED;
-        map_schema.name         = col_meta.get_name();
+        map_schema.name = col_meta.get_name();
+        if (col_meta.is_parquet_field_id_set()) {
+          map_schema.field_id = col_meta.get_parquet_field_id();
+        }
         map_schema.num_children = 1;
         map_schema.parent_idx   = parent_idx;
         schema.push_back(std::move(map_schema));
@@ -612,6 +624,7 @@ std::vector<schema_tree_node> construct_schema_tree(
         col_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name();
         col_schema.parent_idx  = parent_idx;
         col_schema.leaf_column = col;
+        set_field_id(col_schema, col_meta);
         schema.push_back(col_schema);
       }
     };
diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu
index 01a94457b69..9c94a6220d6 100644
--- a/cpp/src/merge/merge.cu
+++ b/cpp/src/merge/merge.cu
@@ -26,6 +26,7 @@
 #include <cudf/structs/structs_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_device_view.cuh>
+#include <cudf/utilities/traits.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -38,7 +39,6 @@
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
-#include "cudf/utilities/traits.hpp"
 #include <queue>
 #include <vector>
 
diff --git a/cpp/src/structs/structs_column_view.cpp b/cpp/src/structs/structs_column_view.cpp
index db9496f18be..681f13386ff 100644
--- a/cpp/src/structs/structs_column_view.cpp
+++ b/cpp/src/structs/structs_column_view.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "cudf/utilities/error.hpp"
 #include <cudf/column/column.hpp>
 #include <cudf/structs/structs_column_view.hpp>
+#include <cudf/utilities/error.hpp>
 
 namespace cudf {
 
diff --git a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
index 64462669f90..28df893aff1 100644
--- a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
+++ b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
@@ -20,13 +20,13 @@
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/types.hpp>
 #include <cudf/unary.hpp>
+#include <cudf/utilities/error.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include "cudf/utilities/error.hpp"
 #include <tests/binaryop/assert-binops.h>
 #include <tests/binaryop/binop-fixture.hpp>
 
diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu
index d69aee57756..f42549514e6 100644
--- a/cpp/tests/hash_map/map_test.cu
+++ b/cpp/tests/hash_map/map_test.cu
@@ -23,12 +23,12 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/logical.h>
 #include <thrust/pair.h>
 #include <thrust/tabulate.h>
 
-#include "rmm/exec_policy.hpp"
 #include <cstdlib>
 #include <iostream>
 #include <limits>
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index cd0aab3caeb..3905df2b274 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -219,15 +219,21 @@ struct ParquetWriterTimestampTypeTest : public ParquetWriterTest {
   auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
 };
 
+// Typed test fixture for all types
+template <typename T>
+struct ParquetWriterSchemaTest : public ParquetWriterTest {
+  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
+};
+
 // Declare typed test cases
 // TODO: Replace with `NumericTypes` when unsigned support is added. Issue #5352
 using SupportedTypes = cudf::test::Types<int8_t, int16_t, int32_t, int64_t, bool, float, double>;
 TYPED_TEST_SUITE(ParquetWriterNumericTypeTest, SupportedTypes);
-using SupportedChronoTypes = cudf::test::Concat<cudf::test::ChronoTypes, cudf::test::DurationTypes>;
-TYPED_TEST_SUITE(ParquetWriterChronoTypeTest, SupportedChronoTypes);
+TYPED_TEST_SUITE(ParquetWriterChronoTypeTest, cudf::test::ChronoTypes);
 using SupportedTimestampTypes =
   cudf::test::Types<cudf::timestamp_ms, cudf::timestamp_us, cudf::timestamp_ns>;
 TYPED_TEST_SUITE(ParquetWriterTimestampTypeTest, SupportedTimestampTypes);
+TYPED_TEST_SUITE(ParquetWriterSchemaTest, cudf::test::AllTypes);
 
 // Base test fixture for chunked writer tests
 struct ParquetChunkedWriterTest : public cudf::test::BaseFixture {
diff --git a/cpp/tests/iterator/value_iterator_test_strings.cu b/cpp/tests/iterator/value_iterator_test_strings.cu
index 5bddbfbd4aa..9aa18eb844f 100644
--- a/cpp/tests/iterator/value_iterator_test_strings.cu
+++ b/cpp/tests/iterator/value_iterator_test_strings.cu
@@ -12,10 +12,12 @@
  * or implied. See the License for the specific language governing permissions and limitations under
  * the License.
  */
-#include "cudf/detail/utilities/vector_factories.hpp"
-#include "rmm/cuda_stream_view.hpp"
-#include "rmm/device_uvector.hpp"
-#include <tests/iterator/iterator_tests.cuh>
+#include "iterator_tests.cuh"
+
+#include <cudf/detail/utilities/vector_factories.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
 
 #include <thrust/host_vector.h>
 #include <thrust/iterator/counting_iterator.h>
diff --git a/cpp/tests/partitioning/partition_test.cpp b/cpp/tests/partitioning/partition_test.cpp
index 785af409c4c..014a19e93a9 100644
--- a/cpp/tests/partitioning/partition_test.cpp
+++ b/cpp/tests/partitioning/partition_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <cudf/copying.hpp>
-#include <cudf/partitioning.hpp>
-#include <cudf/table/table.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include "cudf/sorting.hpp"
+#include <cudf/copying.hpp>
+#include <cudf/partitioning.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/table/table.hpp>
 
 template <typename T>
 class PartitionTest : public cudf::test::BaseFixture {
diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
index 1d600acfef1..e0ef3cb2ff0 100644
--- a/docs/cudf/source/api_docs/dataframe.rst
+++ b/docs/cudf/source/api_docs/dataframe.rst
@@ -149,6 +149,7 @@ Computations / descriptive stats
    DataFrame.round
    DataFrame.skew
    DataFrame.sum
+   DataFrame.sum_of_squares
    DataFrame.std
    DataFrame.var
    DataFrame.nunique
@@ -248,9 +249,11 @@ Serialization / IO / conversion
    DataFrame.to_dlpack
    DataFrame.to_parquet
    DataFrame.to_csv
+   DataFrame.to_cupy
    DataFrame.to_hdf
    DataFrame.to_dict
    DataFrame.to_json
+   DataFrame.to_numpy
    DataFrame.to_pandas
    DataFrame.to_feather
    DataFrame.to_records
diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst
index 6f5affd0ecd..8e0e3bbd411 100644
--- a/docs/cudf/source/api_docs/index_objects.rst
+++ b/docs/cudf/source/api_docs/index_objects.rst
@@ -92,7 +92,9 @@ Conversion
 
    Index.astype
    Index.to_arrow
+   Index.to_cupy
    Index.to_list
+   Index.to_numpy
    Index.to_series
    Index.to_frame
    Index.to_pandas
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index 95aa71919e4..d7015c9348d 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -390,10 +390,12 @@ Serialization / IO / conversion
    :toctree: api/
 
    Series.to_arrow
+   Series.to_cupy
    Series.to_dlpack
    Series.to_frame
    Series.to_hdf
    Series.to_json
+   Series.to_numpy
    Series.to_pandas
    Series.to_string
    Series.from_arrow
diff --git a/docs/cudf/source/api_docs/string_handling.rst b/docs/cudf/source/api_docs/string_handling.rst
index 3087bcaa826..8d4646c47a7 100644
--- a/docs/cudf/source/api_docs/string_handling.rst
+++ b/docs/cudf/source/api_docs/string_handling.rst
@@ -83,7 +83,6 @@ strings and apply several methods to it. These can be accessed like
    rsplit
    startswith
    strip
-   subword_tokenize
    swapcase
    title
    token_count
diff --git a/docs/cudf/source/basics/basics.rst b/docs/cudf/source/basics/basics.rst
index 60a65558033..9b8983fba49 100644
--- a/docs/cudf/source/basics/basics.rst
+++ b/docs/cudf/source/basics/basics.rst
@@ -15,36 +15,40 @@ The following table lists all of cudf types. For methods requiring dtype argumen
 .. rst-class:: special-table
 .. table::
 
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Kind of Data           | Data Type        | Scalar                                                                              | String Aliases                              |
-    +========================+==================+=====================================================================================+=============================================+
-    | Integer                |                  | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_, np.uint16_,                   | ``'int8'``, ``'int16'``, ``'int32'``,       |
-    |                        |                  | np.uint32_, np.uint64_                                                              | ``'int64'``, ``'uint8'``, ``'uint16'``,     |
-    |                        |                  |                                                                                     | ``'uint32'``, ``'uint64'``                  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Float                  |                  | np.float32_, np.float64_                                                            | ``'float32'``, ``'float64'``                |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Strings                |                  | `str <https://docs.python.org/3/library/stdtypes.html#str>`_                        | ``'string'``, ``'object'``                  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Datetime               |                  | np.datetime64_                                                                      | ``'datetime64[s]'``, ``'datetime64[ms]'``,  |
-    |                        |                  |                                                                                     | ``'datetime64[us]'``, ``'datetime64[ns]'``  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Timedelta              |                  | np.timedelta64_                                                                     | ``'timedelta64[s]'``, ``'timedelta64[ms]'``,|
-    | (duration type)        |                  |                                                                                     | ``'timedelta64[us]'``, ``'timedelta64[ns]'``|
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Categorical            | CategoricalDtype | (none)                                                                              | ``'category'``                              |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Boolean                |                  | np.bool_                                                                            | ``'bool'``                                  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Decimal                | Decimal32Dtype,  | (none)                                                                              | (none)                                      |
-    |                        | Decimal64Dtype,  |                                                                                     |                                             |
-    |                        | Decimal128Dtype  |                                                                                     |                                             |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Kind of Data    | Data Type        | Scalar                                                       | String Aliases                               |
+    +=================+==================+==============================================================+==============================================+
+    | Integer         |                  | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_,        | ``'int8'``, ``'int16'``, ``'int32'``,        |
+    |                 |                  | np.uint16_, np.uint32_, np.uint64_                           | ``'int64'``, ``'uint8'``, ``'uint16'``,      |
+    |                 |                  |                                                              | ``'uint32'``, ``'uint64'``                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Float           |                  | np.float32_, np.float64_                                     | ``'float32'``, ``'float64'``                 |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Strings         |                  | `str <https://docs.python.org/3/library/stdtypes.html#str>`_ | ``'string'``, ``'object'``                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Datetime        |                  | np.datetime64_                                               | ``'datetime64[s]'``, ``'datetime64[ms]'``,   |
+    |                 |                  |                                                              | ``'datetime64[us]'``, ``'datetime64[ns]'``   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Timedelta       |                  | np.timedelta64_                                              | ``'timedelta64[s]'``, ``'timedelta64[ms]'``, |
+    | (duration type) |                  |                                                              | ``'timedelta64[us]'``, ``'timedelta64[ns]'`` |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Categorical     | CategoricalDtype | (none)                                                       | ``'category'``                               |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Boolean         |                  | np.bool_                                                     | ``'bool'``                                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Decimal         | Decimal32Dtype,  | (none)                                                       | (none)                                       |
+    |                 | Decimal64Dtype,  |                                                              |                                              |
+    |                 | Decimal128Dtype  |                                                              |                                              |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Lists           | ListDtype        | list                                                         | ``'list'``                                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Structs         | StructDtype      | dict                                                         | ``'struct'``                                 |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
 
 **Note: All dtypes above are Nullable**
 
-.. _np.int8: 
-.. _np.int16: 
+.. _np.int8:
+.. _np.int16:
 .. _np.int32:
 .. _np.int64:
 .. _np.uint8:
diff --git a/docs/cudf/source/basics/internals.rst b/docs/cudf/source/basics/internals.rst
index 60b63c6fab8..96ef40d51e6 100644
--- a/docs/cudf/source/basics/internals.rst
+++ b/docs/cudf/source/basics/internals.rst
@@ -54,7 +54,7 @@ As another example, the ``StringColumn`` backing the Series
 2. No mask buffer as there are no nulls in the Series
 3. Two children columns:
 
-    -  A column of 8-bit characters
+    -  A column of UTF-8 characters
        ``['d', 'o', 'y', 'o', 'u', h' ... '?']``
     -  A column of "offsets" to the characters column (in this case,
        ``[0, 2, 5, 9, 12, 19]``)
@@ -172,7 +172,7 @@ Selecting columns by index:
     >>> ca.select_by_index(1)
     ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
     >>> ca.select_by_index([0, 1])
-    ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))    
+    ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
     >>> ca.select_by_index(slice(1, 3))
     ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>), ('z', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5788c0>)]), multiindex=False, level_names=(None,))
 
diff --git a/docs/cudf/source/basics/io-gds-integration.rst b/docs/cudf/source/basics/io-gds-integration.rst
index 71c114e9149..5ff07ac29c5 100644
--- a/docs/cudf/source/basics/io-gds-integration.rst
+++ b/docs/cudf/source/basics/io-gds-integration.rst
@@ -1,14 +1,14 @@
 GPUDirect Storage Integration
 =============================
 
-Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations. 
-GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU. 
-GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer. 
+Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations.
+GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU.
+GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer.
 The SDK is available for download `here <https://developer.nvidia.com/gpudirect-storage>`_.
 GDS is also included in CUDA Toolkit 11.4 and higher.
 
-Use of GPUDirect Storage in cuDF is enabled by default, but can be disabled through the environment variable ``LIBCUDF_CUFILE_POLICY``. 
-This variable also controls the GDS compatibility mode. 
+Use of GPUDirect Storage in cuDF is enabled by default, but can be disabled through the environment variable ``LIBCUDF_CUFILE_POLICY``.
+This variable also controls the GDS compatibility mode.
 
 There are three valid values for the environment variable:
 
@@ -20,17 +20,17 @@ If no value is set, behavior will be the same as the "GDS" option.
 
 This environment variable also affects how cuDF treats GDS errors.
 When ``LIBCUDF_CUFILE_POLICY`` is set to "GDS" and a GDS API call fails for any reason, cuDF falls back to the internal implementation with bounce buffers.
-When ``LIBCUDF_CUFILE_POLICY`` is set to "ALWAYS" and a GDS API call fails for any reason (unlikely, given that the compatibility mode is on), 
+When ``LIBCUDF_CUFILE_POLICY`` is set to "ALWAYS" and a GDS API call fails for any reason (unlikely, given that the compatibility mode is on),
 cuDF throws an exception to propagate the error to te user.
 
 Operations that support the use of GPUDirect Storage:
 
-- `read_avro`
-- `read_parquet`
-- `read_orc`
-- `to_csv`
-- `to_parquet`
-- `to_orc`
+- :py:func:`cudf.read_avro`
+- :py:func:`cudf.read_parquet`
+- :py:func:`cudf.read_orc`
+- :py:meth:`cudf.DataFrame.to_csv`
+- :py:meth:`cudf.DataFrame.to_parquet`
+- :py:meth:`cudf.DataFrame.to_orc`
 
 Several parameters that can be used to tune the performance of GDS-enabled I/O are exposed through environment variables:
 
diff --git a/docs/cudf/source/basics/io-nvcomp-integration.rst b/docs/cudf/source/basics/io-nvcomp-integration.rst
index 521833e2afd..fc24e0c15f4 100644
--- a/docs/cudf/source/basics/io-nvcomp-integration.rst
+++ b/docs/cudf/source/basics/io-nvcomp-integration.rst
@@ -1,14 +1,14 @@
 nvCOMP Integration
 =============================
 
-Some types of compression/decompression can be performed using either `nvCOMP library <https://github.com/NVIDIA/nvcomp>`_ or the internal implementation. 
+Some types of compression/decompression can be performed using either the `nvCOMP library <https://github.com/NVIDIA/nvcomp>`_ or the internal implementation.
 
 Which implementation is used by default depends on the data format and the compression type.
 Behavior can be influenced through environment variable ``LIBCUDF_NVCOMP_POLICY``.
 
 There are three valid values for the environment variable:
 
-- "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use. 
+- "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use.
 - "ALWAYS": Enable all available uses of nvCOMP, including new, experimental combinations.
 - "OFF": Disable nvCOMP use whenever possible and use the internal implementations instead.
 
diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index dbdf8e59e6a..d65b77ef74b 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -252,6 +252,7 @@ def process_class_docstrings(app, what, name, obj, options, lines):
             lines[:] = lines[:cut_index]
 
 
+nitpick_ignore = [("py:class", "SeriesOrIndex"),]
 
 
 def setup(app):
diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
index 41bce8b865e..0d05ddb00b4 100644
--- a/docs/cudf/source/user_guide/guide-to-udfs.ipynb
+++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
@@ -138,7 +138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -148,7 +148,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -160,7 +160,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -193,7 +193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -205,7 +205,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -218,7 +218,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -229,7 +229,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -241,7 +241,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -260,7 +260,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -274,7 +274,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -286,7 +286,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -322,7 +322,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -331,7 +331,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -355,7 +355,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -373,7 +373,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -452,7 +452,7 @@
        "4   979   982  1011   9790.0"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -497,7 +497,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -514,7 +514,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -569,7 +569,7 @@
        "2  3     6"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -591,7 +591,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -603,7 +603,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -621,7 +621,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -633,7 +633,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -658,7 +658,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -709,7 +709,7 @@
        "2     3"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -728,7 +728,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -740,7 +740,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -758,7 +758,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -813,7 +813,7 @@
        "2  3  1"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -836,7 +836,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -848,7 +848,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -866,7 +866,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -921,7 +921,7 @@
        "2  3  3.14"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -939,7 +939,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -951,7 +951,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -982,7 +982,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -1033,7 +1033,7 @@
        "2  5"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1054,7 +1054,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -1066,7 +1066,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1084,7 +1084,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -1151,7 +1151,7 @@
        "2  3  6     4  8  6"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1172,7 +1172,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -1184,7 +1184,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1212,7 +1212,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1241,7 +1241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -1312,7 +1312,7 @@
        "2  3  6     4  8  6  9.0"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1344,7 +1344,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -1417,7 +1417,7 @@
        "4   979   982  1011"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1443,7 +1443,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
@@ -1522,7 +1522,7 @@
        "4   979   982  1011  1961.0"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1555,7 +1555,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -1570,7 +1570,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1582,7 +1582,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -1591,7 +1591,7 @@
        "Rolling [window=3,min_periods=3,center=False]"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1610,7 +1610,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1634,7 +1634,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
@@ -1649,7 +1649,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 41,
+     "execution_count": 37,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1667,7 +1667,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -1734,7 +1734,7 @@
        "4  59.0  59.0"
       ]
      },
-     "execution_count": 42,
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1748,7 +1748,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
@@ -1845,7 +1845,7 @@
        "9        100.0        100.0"
       ]
      },
-     "execution_count": 43,
+     "execution_count": 39,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1863,12 +1863,12 @@
     "\n",
     "We can also apply UDFs to grouped DataFrames using `apply_grouped`. This example is also drawn and adapted from the RAPIDS [API documentation]().\n",
     "\n",
-    "First, we'll group our DataFrame based on column `b`, which is either True or False. Note that we currently need to pass `method=\"cudf\"` to use UDFs with GroupBy objects."
+    "First, we'll group our DataFrame based on column `b`, which is either True or False."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
@@ -1947,7 +1947,7 @@
        "4 -0.970850  False   Sarah  0.342905"
       ]
      },
-     "execution_count": 44,
+     "execution_count": 40,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1959,7 +1959,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1975,7 +1975,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2002,7 +2002,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -2132,7 +2132,7 @@
        "9 -0.725581   True  George  0.405245       0.271319"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 43,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2162,7 +2162,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [
     {
@@ -2171,7 +2171,7 @@
        "array([ 1.,  2.,  3.,  4., 10.])"
       ]
      },
-     "execution_count": 48,
+     "execution_count": 44,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2193,7 +2193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [
     {
@@ -2207,14 +2207,12 @@
        "dtype: int32"
       ]
      },
-     "execution_count": 49,
+     "execution_count": 45,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from cudf.utils import cudautils\n",
-    "\n",
     "@cuda.jit\n",
     "def multiply_by_5(x, out):\n",
     "    i = cuda.grid(1)\n",
@@ -2235,7 +2233,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [
     {
@@ -2244,7 +2242,7 @@
        "array([ 5., 10., 15., 20., 50.])"
       ]
      },
-     "execution_count": 50,
+     "execution_count": 46,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2307,7 +2305,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.9.12"
   }
  },
  "nbformat": 4,
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java
index 78b3d5d52ec..f3fb7de6abe 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java
@@ -33,9 +33,15 @@ public class ColumnWriterOptions {
   private boolean isNullable;
   private boolean isMap = false;
   private String columnName;
+  // only for Parquet
+  private boolean hasParquetFieldId;
+  private int parquetFieldId;
+
   private ColumnWriterOptions(AbstractStructBuilder builder) {
     this.columnName = builder.name;
     this.isNullable = builder.isNullable;
+    this.hasParquetFieldId = builder.hasParquetFieldId;
+    this.parquetFieldId = builder.parquetFieldId;
     this.childColumnOptions =
         (ColumnWriterOptions[]) builder.children.toArray(new ColumnWriterOptions[0]);
   }
@@ -67,6 +73,10 @@ public AbstractStructBuilder(String name, boolean isNullable) {
       super(name, isNullable);
     }
 
+    public AbstractStructBuilder(String name, boolean isNullable, int parquetFieldId) {
+      super(name, isNullable, parquetFieldId);
+    }
+
     protected AbstractStructBuilder() {
       super();
     }
@@ -84,6 +94,9 @@ public static abstract class NestedBuilder<T extends NestedBuilder, V extends Co
     protected List<ColumnWriterOptions> children = new ArrayList<>();
     protected boolean isNullable = true;
     protected String name = "";
+    // Parquet structure needs
+    protected boolean hasParquetFieldId;
+    protected int parquetFieldId;
 
     /**
      * Builder specific to build a Struct meta
@@ -93,22 +106,43 @@ protected NestedBuilder(String name, boolean isNullable) {
       this.isNullable = isNullable;
     }
 
+    protected NestedBuilder(String name, boolean isNullable, int parquetFieldId) {
+      this.name = name;
+      this.isNullable = isNullable;
+      this.hasParquetFieldId = true;
+      this.parquetFieldId = parquetFieldId;
+    }
+
     protected NestedBuilder() {}
 
-    protected ColumnWriterOptions withColumns(String name, boolean isNullable) {
+    protected ColumnWriterOptions withColumn(String name, boolean isNullable) {
       return new ColumnWriterOptions(name, isNullable);
     }
 
+    protected ColumnWriterOptions withColumn(String name, boolean isNullable, int parquetFieldId) {
+      return new ColumnWriterOptions(name, isNullable, parquetFieldId);
+    }
+
     protected ColumnWriterOptions withDecimal(String name, int precision,
                                               boolean isNullable) {
       return new ColumnWriterOptions(name, false, precision, isNullable);
     }
 
+    protected ColumnWriterOptions withDecimal(String name, int precision,
+                                              boolean isNullable, int parquetFieldId) {
+      return new ColumnWriterOptions(name, false, precision, isNullable, parquetFieldId);
+    }
+
     protected ColumnWriterOptions withTimestamp(String name, boolean isInt96,
                                                 boolean isNullable) {
       return new ColumnWriterOptions(name, isInt96, UNKNOWN_PRECISION, isNullable);
     }
 
+    protected ColumnWriterOptions withTimestamp(String name, boolean isInt96,
+                                                boolean isNullable, int parquetFieldId) {
+      return new ColumnWriterOptions(name, isInt96, UNKNOWN_PRECISION, isNullable, parquetFieldId);
+    }
+
     /**
      * Set the list column meta.
      * Lists should have only one child in ColumnVector, but the metadata expects a
@@ -155,16 +189,16 @@ public T withStructColumn(StructColumnWriterOptions child) {
     /**
      * Set column name
      */
-    public T withNonNullableColumns(String... name) {
-      withColumns(false, name);
+    public T withNonNullableColumns(String... names) {
+      withColumns(false, names);
       return (T) this;
     }
 
     /**
      * Set nullable column meta data
      */
-    public T withNullableColumns(String... name) {
-      withColumns(true, name);
+    public T withNullableColumns(String... names) {
+      withColumns(true, names);
       return (T) this;
     }
 
@@ -172,13 +206,22 @@ public T withNullableColumns(String... name) {
      * Set a simple child meta data
      * @return this for chaining.
      */
-    public T withColumns(boolean nullable, String... name) {
-      for (String n : name) {
-        children.add(withColumns(n, nullable));
+    public T withColumns(boolean nullable, String... names) {
+      for (String n : names) {
+        children.add(withColumn(n, nullable));
       }
       return (T) this;
     }
 
+    /**
+     * Set a simple child meta data
+     * @return this for chaining.
+     */
+    public T withColumn(boolean nullable, String name, int parquetFieldId) {
+      children.add(withColumn(name, nullable, parquetFieldId));
+      return (T) this;
+    }
+
     /**
      * Set a Decimal child meta data
      * @return this for chaining.
@@ -188,6 +231,15 @@ public T withDecimalColumn(String name, int precision, boolean nullable) {
       return (T) this;
     }
 
+    /**
+     * Set a Decimal child meta data
+     * @return this for chaining.
+     */
+    public T withDecimalColumn(String name, int precision, boolean nullable, int parquetFieldId) {
+      children.add(withDecimal(name, precision, nullable, parquetFieldId));
+      return (T) this;
+    }
+
     /**
      * Set a Decimal child meta data
      * @return this for chaining.
@@ -206,6 +258,15 @@ public T withDecimalColumn(String name, int precision) {
       return (T) this;
     }
 
+    /**
+     * Set a timestamp child meta data
+     * @return this for chaining.
+     */
+    public T withTimestampColumn(String name, boolean isInt96, boolean nullable, int parquetFieldId) {
+      children.add(withTimestamp(name, isInt96, nullable, parquetFieldId));
+      return (T) this;
+    }
+
     /**
      * Set a timestamp child meta data
      * @return this for chaining.
@@ -244,6 +305,13 @@ public ColumnWriterOptions(String columnName, boolean isTimestampTypeInt96,
     this.columnName = columnName;
   }
 
+  public ColumnWriterOptions(String columnName, boolean isTimestampTypeInt96,
+                             int precision, boolean isNullable, int parquetFieldId) {
+    this(columnName, isTimestampTypeInt96, precision, isNullable);
+    this.hasParquetFieldId = true;
+    this.parquetFieldId = parquetFieldId;
+  }
+
   public ColumnWriterOptions(String columnName, boolean isNullable) {
     this.isTimestampTypeInt96 = false;
     this.precision = UNKNOWN_PRECISION;
@@ -251,6 +319,12 @@ public ColumnWriterOptions(String columnName, boolean isNullable) {
     this.columnName = columnName;
   }
 
+  public ColumnWriterOptions(String columnName, boolean isNullable, int parquetFieldId) {
+    this(columnName, isNullable);
+    this.hasParquetFieldId = true;
+    this.parquetFieldId = parquetFieldId;
+  }
+
   public ColumnWriterOptions(String columnName) {
     this(columnName, true);
   }
@@ -302,6 +376,24 @@ int[] getFlatPrecision() {
     }
   }
 
+  boolean[] getFlatHasParquetFieldId() {
+    boolean[] ret = {hasParquetFieldId};
+    if (childColumnOptions.length > 0) {
+      return getFlatBooleans(ret, (opt) -> opt.getFlatHasParquetFieldId());
+    } else {
+      return ret;
+    }
+  }
+
+  int[] getFlatParquetFieldId() {
+    int[] ret = {parquetFieldId};
+    if (childColumnOptions.length > 0) {
+      return getFlatInts(ret, (opt) -> opt.getFlatParquetFieldId());
+    } else {
+      return ret;
+    }
+  }
+
   boolean[] getFlatIsNullable() {
     boolean[] ret = {isNullable};
     if (childColumnOptions.length > 0) {
@@ -418,6 +510,13 @@ public static StructBuilder structBuilder(String name, boolean isNullable) {
     return new StructBuilder(name, isNullable);
   }
 
+  /**
+   * Creates a StructBuilder for column called 'name'
+   */
+  public static StructBuilder structBuilder(String name, boolean isNullable, int parquetFieldId) {
+    return new StructBuilder(name, isNullable, parquetFieldId);
+  }
+
   /**
    * Creates a StructBuilder for column called 'name'
    */
@@ -477,6 +576,10 @@ public StructBuilder(String name, boolean isNullable) {
       super(name, isNullable);
     }
 
+    public StructBuilder(String name, boolean isNullable, int parquetFieldId) {
+      super(name, isNullable, parquetFieldId);
+    }
+
     public StructColumnWriterOptions build() {
       return new StructColumnWriterOptions(this);
     }
diff --git a/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java b/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java
index 9292975d0ce..3a3b7d721b7 100644
--- a/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java
+++ b/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java
@@ -41,6 +41,16 @@ int[] getFlatPrecision() {
     return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatPrecision());
   }
 
+  @Override
+  boolean[] getFlatHasParquetFieldId() {
+    return super.getFlatBooleans(new boolean[]{}, (opt) -> opt.getFlatHasParquetFieldId());
+  }
+
+  @Override
+  int[] getFlatParquetFieldId() {
+    return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatParquetFieldId());
+  }
+
   @Override
   int[] getFlatNumChildren() {
     return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatNumChildren());
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index ff966643866..24f7d44ed28 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -289,7 +289,10 @@ private static native long writeParquetFileBegin(String[] columnNames,
                                                    int statsFreq,
                                                    boolean[] isInt96,
                                                    int[] precisions,
-                                                   boolean[] isMapValues, String filename) throws CudfException;
+                                                   boolean[] isMapValues,
+                                                   boolean[] hasParquetFieldIds,
+                                                   int[] parquetFieldIds,
+                                                   String filename) throws CudfException;
 
   /**
    * Setup everything to write parquet formatted data to a buffer.
@@ -319,6 +322,8 @@ private static native long writeParquetBufferBegin(String[] columnNames,
                                                      boolean[] isInt96,
                                                      int[] precisions,
                                                      boolean[] isMapValues,
+                                                     boolean[] hasParquetFieldIds,
+                                                     int[] parquetFieldIds,
                                                      HostBufferConsumer consumer) throws CudfException;
 
   /**
@@ -1201,6 +1206,8 @@ private ParquetTableWriter(ParquetWriterOptions options, File outputFile) {
       boolean[] timeInt96Values = options.getFlatIsTimeTypeInt96();
       boolean[] isMapValues = options.getFlatIsMap();
       int[] precisions = options.getFlatPrecision();
+      boolean[] hasParquetFieldIds = options.getFlatHasParquetFieldId();
+      int[] parquetFieldIds = options.getFlatParquetFieldId();
       int[] flatNumChildren = options.getFlatNumChildren();
 
       this.consumer = null;
@@ -1215,6 +1222,8 @@ private ParquetTableWriter(ParquetWriterOptions options, File outputFile) {
           timeInt96Values,
           precisions,
           isMapValues,
+          hasParquetFieldIds,
+          parquetFieldIds,
           outputFile.getAbsolutePath());
     }
 
@@ -1224,6 +1233,8 @@ private ParquetTableWriter(ParquetWriterOptions options, HostBufferConsumer cons
       boolean[] timeInt96Values = options.getFlatIsTimeTypeInt96();
       boolean[] isMapValues = options.getFlatIsMap();
       int[] precisions = options.getFlatPrecision();
+      boolean[] hasParquetFieldIds = options.getFlatHasParquetFieldId();
+      int[] parquetFieldIds = options.getFlatParquetFieldId();
       int[] flatNumChildren = options.getFlatNumChildren();
 
       this.consumer = consumer;
@@ -1238,6 +1249,8 @@ private ParquetTableWriter(ParquetWriterOptions options, HostBufferConsumer cons
           timeInt96Values,
           precisions,
           isMapValues,
+          hasParquetFieldIds,
+          parquetFieldIds,
           consumer);
     }
 
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index cebe476dd87..919958d4db2 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -673,6 +673,8 @@ int set_column_metadata(cudf::io::column_in_metadata &column_metadata,
                         cudf::jni::native_jbooleanArray &is_int96,
                         cudf::jni::native_jintArray &precisions,
                         cudf::jni::native_jbooleanArray &is_map,
+                        cudf::jni::native_jbooleanArray &hasParquetFieldIds,
+                        cudf::jni::native_jintArray &parquetFieldIds,
                         cudf::jni::native_jintArray &children, int num_children, int read_index) {
   int write_index = 0;
   for (int i = 0; i < num_children; i++, write_index++) {
@@ -687,12 +689,15 @@ int set_column_metadata(cudf::io::column_in_metadata &column_metadata,
     if (is_map[read_index]) {
       child.set_list_column_as_map();
     }
+    if (!parquetFieldIds.is_null() && hasParquetFieldIds[read_index]) {
+      child.set_parquet_field_id(parquetFieldIds[read_index]);
+    }
     column_metadata.add_child(child);
     int childs_children = children[read_index++];
     if (childs_children > 0) {
-      read_index =
-          set_column_metadata(column_metadata.child(write_index), col_names, nullability, is_int96,
-                              precisions, is_map, children, childs_children, read_index);
+      read_index = set_column_metadata(column_metadata.child(write_index), col_names, nullability,
+                                       is_int96, precisions, is_map, hasParquetFieldIds,
+                                       parquetFieldIds, children, childs_children, read_index);
     }
   }
   return read_index;
@@ -701,12 +706,15 @@ int set_column_metadata(cudf::io::column_in_metadata &column_metadata,
 void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_names,
                          jintArray &j_children, jbooleanArray &j_col_nullability,
                          jbooleanArray &j_is_int96, jintArray &j_precisions,
-                         jbooleanArray &j_is_map, cudf::io::table_input_metadata &metadata) {
+                         jbooleanArray &j_is_map, cudf::io::table_input_metadata &metadata,
+                         jbooleanArray &j_hasParquetFieldIds, jintArray &j_parquetFieldIds) {
   cudf::jni::auto_set_device(env);
   cudf::jni::native_jstringArray col_names(env, j_col_names);
   cudf::jni::native_jbooleanArray col_nullability(env, j_col_nullability);
   cudf::jni::native_jbooleanArray is_int96(env, j_is_int96);
   cudf::jni::native_jintArray precisions(env, j_precisions);
+  cudf::jni::native_jbooleanArray hasParquetFieldIds(env, j_hasParquetFieldIds);
+  cudf::jni::native_jintArray parquetFieldIds(env, j_parquetFieldIds);
   cudf::jni::native_jintArray children(env, j_children);
   cudf::jni::native_jbooleanArray is_map(env, j_is_map);
 
@@ -729,11 +737,14 @@ void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_nam
     if (is_map[read_index]) {
       metadata.column_metadata[write_index].set_list_column_as_map();
     }
+    if (!parquetFieldIds.is_null() && hasParquetFieldIds[read_index]) {
+      metadata.column_metadata[write_index].set_parquet_field_id(parquetFieldIds[read_index]);
+    }
     int childs_children = children[read_index++];
     if (childs_children > 0) {
-      read_index =
-          set_column_metadata(metadata.column_metadata[write_index], cpp_names, col_nullability,
-                              is_int96, precisions, is_map, children, childs_children, read_index);
+      read_index = set_column_metadata(
+          metadata.column_metadata[write_index], cpp_names, col_nullability, is_int96, precisions,
+          is_map, hasParquetFieldIds, parquetFieldIds, children, childs_children, read_index);
     }
   }
 }
@@ -1539,7 +1550,8 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetBufferBegin(
     JNIEnv *env, jclass, jobjectArray j_col_names, jint j_num_children, jintArray j_children,
     jbooleanArray j_col_nullability, jobjectArray j_metadata_keys, jobjectArray j_metadata_values,
     jint j_compression, jint j_stats_freq, jbooleanArray j_isInt96, jintArray j_precisions,
-    jbooleanArray j_is_map, jobject consumer) {
+    jbooleanArray j_is_map, jbooleanArray j_hasParquetFieldIds, jintArray j_parquetFieldIds,
+    jobject consumer) {
   JNI_NULL_CHECK(env, j_col_names, "null columns", 0);
   JNI_NULL_CHECK(env, j_col_nullability, "null nullability", 0);
   JNI_NULL_CHECK(env, j_metadata_keys, "null metadata keys", 0);
@@ -1554,7 +1566,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetBufferBegin(
     sink_info sink{data_sink.get()};
     table_input_metadata metadata;
     createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_isInt96,
-                        j_precisions, j_is_map, metadata);
+                        j_precisions, j_is_map, metadata, j_hasParquetFieldIds, j_parquetFieldIds);
 
     auto meta_keys = cudf::jni::native_jstringArray{env, j_metadata_keys}.as_cpp_vector();
     auto meta_values = cudf::jni::native_jstringArray{env, j_metadata_values}.as_cpp_vector();
@@ -1583,7 +1595,8 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetFileBegin(
     JNIEnv *env, jclass, jobjectArray j_col_names, jint j_num_children, jintArray j_children,
     jbooleanArray j_col_nullability, jobjectArray j_metadata_keys, jobjectArray j_metadata_values,
     jint j_compression, jint j_stats_freq, jbooleanArray j_isInt96, jintArray j_precisions,
-    jbooleanArray j_is_map, jstring j_output_path) {
+    jbooleanArray j_is_map, jbooleanArray j_hasParquetFieldIds, jintArray j_parquetFieldIds,
+    jstring j_output_path) {
   JNI_NULL_CHECK(env, j_col_names, "null columns", 0);
   JNI_NULL_CHECK(env, j_col_nullability, "null nullability", 0);
   JNI_NULL_CHECK(env, j_metadata_keys, "null metadata keys", 0);
@@ -1596,7 +1609,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetFileBegin(
     using namespace cudf::jni;
     table_input_metadata metadata;
     createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_isInt96,
-                        j_precisions, j_is_map, metadata);
+                        j_precisions, j_is_map, metadata, j_hasParquetFieldIds, j_parquetFieldIds);
 
     auto meta_keys = cudf::jni::native_jstringArray{env, j_metadata_keys}.as_cpp_vector();
     auto meta_values = cudf::jni::native_jstringArray{env, j_metadata_values}.as_cpp_vector();
@@ -1721,8 +1734,12 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin(
     table_input_metadata metadata;
     // ORC has no `j_is_int96`, but `createTableMetaData` needs a lvalue.
     jbooleanArray j_is_int96 = NULL;
+    // ORC has no `j_parquetFieldIds`, but `createTableMetaData` needs a lvalue.
+    jbooleanArray j_hasParquetFieldIds = NULL;
+    jintArray j_parquetFieldIds = NULL;
+
     createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_is_int96,
-                        j_precisions, j_is_map, metadata);
+                        j_precisions, j_is_map, metadata, j_hasParquetFieldIds, j_parquetFieldIds);
 
     auto meta_keys = cudf::jni::native_jstringArray{env, j_metadata_keys}.as_cpp_vector();
     auto meta_values = cudf::jni::native_jstringArray{env, j_metadata_values}.as_cpp_vector();
@@ -1766,8 +1783,11 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(
     table_input_metadata metadata;
     // ORC has no `j_is_int96`, but `createTableMetaData` needs a lvalue.
     jbooleanArray j_is_int96 = NULL;
+    // ORC has no `j_parquetFieldIds`, but `createTableMetaData` needs a lvalue.
+    jbooleanArray j_hasParquetFieldIds = NULL;
+    jintArray j_parquetFieldIds = NULL;
     createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_is_int96,
-                        j_precisions, j_is_map, metadata);
+                        j_precisions, j_is_map, metadata, j_hasParquetFieldIds, j_parquetFieldIds);
 
     auto meta_keys = cudf::jni::native_jstringArray{env, j_metadata_keys}.as_cpp_vector();
     auto meta_values = cudf::jni::native_jstringArray{env, j_metadata_values}.as_cpp_vector();
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 7be1ca2118b..af28cfb6d6c 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -36,6 +36,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.parquet.hadoop.ParquetFileReader;
 import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.schema.GroupType;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.OriginalType;
 import org.junit.jupiter.api.Test;
@@ -7899,6 +7900,126 @@ void testParquetWriteToFileUncompressedNoStats() throws IOException {
     }
   }
 
+  @Test
+  void testParquetWriteWithFieldId() throws IOException {
+    // field IDs are:
+    // c1: -1, c2: 2, c3: 3, c31: 31, c32: 32, c4: -4, c5: not specified
+    ColumnWriterOptions.StructBuilder sBuilder =
+        structBuilder("c3", true, 3)
+            .withColumn(true, "c31", 31)
+            .withColumn(true, "c32", 32);
+    ParquetWriterOptions options = ParquetWriterOptions.builder()
+        .withColumn(true, "c1", -1)
+        .withDecimalColumn("c2", 9, true, 2)
+        .withStructColumn(sBuilder.build())
+        .withTimestampColumn("c4", true, true, -4)
+        .withColumns( true, "c5")
+        .build();
+
+    File tempFile = File.createTempFile("test-field-id", ".parquet");
+    try {
+      HostColumnVector.StructType structType = new HostColumnVector.StructType(
+          true,
+          new HostColumnVector.BasicType(true, DType.STRING),
+          new HostColumnVector.BasicType(true, DType.STRING));
+
+      try (Table table0 = new Table.TestBuilder()
+          .column(true, false) // c1
+          .decimal32Column(0, 298, 2473) // c2
+          .column(structType, // c3
+              new HostColumnVector.StructData("a", "b"), new HostColumnVector.StructData("a", "b"))
+          .timestampMicrosecondsColumn(1000L, 2000L) // c4
+          .column("a", "b") // c5
+          .build()) {
+        try (TableWriter writer = Table.writeParquetChunked(options, tempFile.getAbsoluteFile())) {
+          writer.write(table0);
+        }
+      }
+
+      try (ParquetFileReader reader = ParquetFileReader.open(HadoopInputFile.fromPath(
+          new Path(tempFile.getAbsolutePath()),
+          new Configuration()))) {
+        MessageType schema = reader.getFooter().getFileMetaData().getSchema();
+        assert (schema.getFields().get(0).getId().intValue() == -1);
+        assert (schema.getFields().get(1).getId().intValue() == 2);
+        assert (schema.getFields().get(2).getId().intValue() == 3);
+        assert (((GroupType) schema.getFields().get(2)).getFields().get(0).getId().intValue() == 31);
+        assert (((GroupType) schema.getFields().get(2)).getFields().get(1).getId().intValue() == 32);
+        assert (schema.getFields().get(3).getId().intValue() == -4);
+        assert (schema.getFields().get(4).getId() == null);
+      }
+    } finally {
+      tempFile.delete();
+    }
+  }
+
+  @Test
+  void testParquetWriteWithFieldIdNestNotSpecified() throws IOException {
+    // field IDs are:
+    // c0: no field ID
+    // c1: 1
+    // c2: no field ID
+    //   c21: 21
+    //   c22: no field ID
+    // c3: 3
+    //   c31: 31
+    //   c32: no field ID
+    // c4: 0
+    ColumnWriterOptions.StructBuilder c2Builder =
+        structBuilder("c2", true)
+            .withColumn(true, "c21", 21)
+            .withColumns(true, "c22");
+    ColumnWriterOptions.StructBuilder c3Builder =
+        structBuilder("c3", true, 3)
+            .withColumn(true, "c31", 31)
+            .withColumns(true, "c32");
+    ParquetWriterOptions options = ParquetWriterOptions.builder()
+        .withColumns(true, "c0")
+        .withDecimalColumn("c1", 9, true, 1)
+        .withStructColumn(c2Builder.build())
+        .withStructColumn(c3Builder.build())
+        .withColumn(true, "c4", 0)
+        .build();
+
+    File tempFile = File.createTempFile("test-field-id", ".parquet");
+    try {
+      HostColumnVector.StructType structType = new HostColumnVector.StructType(
+          true,
+          new HostColumnVector.BasicType(true, DType.STRING),
+          new HostColumnVector.BasicType(true, DType.STRING));
+
+      try (Table table0 = new Table.TestBuilder()
+          .column(true, false) // c0
+          .decimal32Column(0, 298, 2473) // c1
+          .column(structType, // c2
+              new HostColumnVector.StructData("a", "b"), new HostColumnVector.StructData("a", "b"))
+          .column(structType, // c3
+              new HostColumnVector.StructData("a", "b"), new HostColumnVector.StructData("a", "b"))
+          .column("a", "b") // c4
+          .build()) {
+        try (TableWriter writer = Table.writeParquetChunked(options, tempFile.getAbsoluteFile())) {
+          writer.write(table0);
+        }
+      }
+
+      try (ParquetFileReader reader = ParquetFileReader.open(HadoopInputFile.fromPath(
+          new Path(tempFile.getAbsolutePath()),
+          new Configuration()))) {
+        MessageType schema = reader.getFooter().getFileMetaData().getSchema();
+        assert (schema.getFields().get(0).getId() == null);
+        assert (schema.getFields().get(1).getId().intValue() == 1);
+        assert (schema.getFields().get(2).getId() == null);
+        assert (((GroupType) schema.getFields().get(2)).getFields().get(0).getId().intValue() == 21);
+        assert (((GroupType) schema.getFields().get(2)).getFields().get(1).getId() == null);
+        assert (((GroupType) schema.getFields().get(3)).getFields().get(0).getId().intValue() == 31);
+        assert (((GroupType) schema.getFields().get(3)).getFields().get(1).getId() == null);
+        assert (schema.getFields().get(4).getId().intValue() == 0);
+      }
+    } finally {
+      tempFile.delete();
+    }
+  }
+
   /** Return a column where DECIMAL64 has been up-casted to DECIMAL128 */
   private ColumnVector castDecimal64To128(ColumnView c) {
     DType dtype = c.getType();
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 259a7f711c3..6fed6510484 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -118,7 +118,7 @@ def get_level_values(self, level):
 
         See Also
         --------
-        cudf.core.multiindex.MultiIndex.get_level_values : Get values for
+        cudf.MultiIndex.get_level_values : Get values for
             a level of a MultiIndex.
 
         Notes
diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py
index 7c585602c23..0fef6630248 100644
--- a/python/cudf/cudf/core/cut.py
+++ b/python/cudf/cudf/core/cut.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+
 from collections.abc import Sequence
 
 import cupy
@@ -21,21 +23,27 @@ def cut(
     duplicates: str = "raise",
     ordered: bool = True,
 ):
+    """Bin values into discrete intervals.
 
-    """
-    Bin values into discrete intervals.
     Use cut when you need to segment and sort data values into bins. This
     function is also useful for going from a continuous variable to a
     categorical variable.
+
     Parameters
     ----------
     x : array-like
         The input array to be binned. Must be 1-dimensional.
     bins : int, sequence of scalars, or IntervalIndex
         The criteria to bin by.
-        * int : Defines the number of equal-width bins in the
-        range of x. The range of x is extended by .1% on each
-        side to include the minimum and maximum values of x.
+
+        * int : Defines the number of equal-width bins in the range of `x`. The
+          range of `x` is extended by .1% on each side to include the minimum
+          and maximum values of `x`.
+        * sequence of scalars : Defines the bin edges allowing for non-uniform
+          width. No extension of the range of `x` is done.
+        * IntervalIndex : Defines the exact bins to be used. Note that
+          IntervalIndex for `bins` must be non-overlapping.
+
     right : bool, default True
         Indicates whether bins includes the rightmost edge or not.
     labels : array or False, default None
@@ -56,6 +64,7 @@ def cut(
         Categorical and Series (with Categorical dtype). If True,
         the resulting categorical will be ordered. If False, the resulting
         categorical will be unordered (labels must be provided).
+
     Returns
     -------
     out : CategoricalIndex
@@ -66,30 +75,38 @@ def cut(
         For scalar or sequence bins, this is an ndarray with the computed
         bins. If set duplicates=drop, bins will drop non-unique bin. For
         an IntervalIndex bins, this is equal to bins.
+
     Examples
     --------
     Discretize into three equal-sized bins.
+
     >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), 3)
     CategoricalIndex([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0],
-    ...         (5.0, 7.0],(0.994, 3.0]], categories=[(0.994, 3.0],
-    ...         (3.0, 5.0], (5.0, 7.0]], ordered=True, dtype='category')
+                (5.0, 7.0], (0.994, 3.0]], categories=[(0.994, 3.0],
+                (3.0, 5.0], (5.0, 7.0]], ordered=True, dtype='category')
+
     >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True)
     (CategoricalIndex([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0],
-    ...         (5.0, 7.0],(0.994, 3.0]],categories=[(0.994, 3.0],
-    ...         (3.0, 5.0], (5.0, 7.0]],ordered=True, dtype='category'),
-    array([0.994, 3.   , 5.   , 7.   ]))
+                (5.0, 7.0], (0.994, 3.0]], categories=[(0.994, 3.0],
+                (3.0, 5.0], (5.0, 7.0]], ordered=True, dtype='category'),
+     array([0.994, 3.   , 5.   , 7.   ]))
+
     >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]),
-    ...        3, labels=["bad", "medium", "good"])
+    ...          3, labels=["bad", "medium", "good"])
     CategoricalIndex(['bad', 'good', 'medium', 'medium', 'good', 'bad'],
-    ...       categories=['bad', 'medium', 'good'],ordered=True,
-    ...       dtype='category')
+                     categories=['bad', 'medium', 'good'],ordered=True,
+                     dtype='category')
+
     >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), 3,
-    ...       labels=["B", "A", "B"], ordered=False)
+    ...          labels=["B", "A", "B"], ordered=False)
     CategoricalIndex(['B', 'B', 'A', 'A', 'B', 'B'], categories=['A', 'B'],
-    ...        ordered=False, dtype='category')
+               ordered=False, dtype='category')
+
     >>> cudf.cut([0, 1, 1, 2], bins=4, labels=False)
     array([0, 1, 1, 3], dtype=int32)
+
     Passing a Series as an input returns a Series with categorical dtype:
+
     >>> s = cudf.Series(np.array([2, 4, 6, 8, 10]),
     ...        index=['a', 'b', 'c', 'd', 'e'])
     >>> cudf.cut(s, 3)
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index ae60cd91fac..8893b85c97c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -2660,11 +2660,6 @@ def diff(self, periods=1, axis=0):
         if axis != 0:
             raise NotImplementedError("Only axis=0 is supported.")
 
-        if not all(is_numeric_dtype(i) for i in self.dtypes):
-            raise NotImplementedError(
-                "DataFrame.diff only supports numeric dtypes"
-            )
-
         if abs(periods) > len(self):
             df = cudf.DataFrame._from_data(
                 {
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 6b98e82d553..40f8eda0e4f 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -566,19 +566,20 @@ def mult(df):
             .. code-block::
 
                 >>> df = pd.DataFrame({
-                    'a': [1, 1, 2, 2],
-                    'b': [1, 2, 1, 2],
-                    'c': [1, 2, 3, 4]})
+                ...     'a': [1, 1, 2, 2],
+                ...     'b': [1, 2, 1, 2],
+                ...     'c': [1, 2, 3, 4],
+                ... })
                 >>> gdf = cudf.from_pandas(df)
                 >>> df.groupby('a').apply(lambda x: x.iloc[[0]])
-                        a  b  c
-                    a
-                    1 0  1  1  1
-                    2 2  2  1  3
+                     a  b  c
+                a
+                1 0  1  1  1
+                2 2  2  1  3
                 >>> gdf.groupby('a').apply(lambda x: x.iloc[[0]])
-                        a  b  c
-                    0  1  1  1
-                    2  2  1  3
+                   a  b  c
+                0  1  1  1
+                2  2  1  3
         """
         if not callable(function):
             raise TypeError(f"type {type(function)} is not callable")
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 10736948b57..ea722ec3968 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -991,6 +991,7 @@ def add_prefix(self, prefix):
         Examples
         --------
         **Series**
+
         >>> s = cudf.Series([1, 2, 3, 4])
         >>> s
         0    1
@@ -1006,6 +1007,7 @@ def add_prefix(self, prefix):
         dtype: int64
 
         **DataFrame**
+
         >>> df = cudf.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
         >>> df
            A  B
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 6e15c03e6b4..20ba52afccd 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -14,11 +14,11 @@
 import numpy as np
 import pandas as pd
 from pandas._config import get_option
+from pandas.core.dtypes.common import is_float
 
 import cudf
 from cudf import _lib as libcudf
 from cudf._lib.scalar import _is_null_host_scalar
-from cudf._lib.transform import bools_to_mask
 from cudf._typing import ColumnLike, DataFrameOrSeries, ScalarLike
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
@@ -42,7 +42,6 @@
     arange,
     as_column,
     column,
-    column_empty_like,
     full,
 )
 from cudf.core.column.categorical import (
@@ -64,7 +63,7 @@
 )
 from cudf.core.single_column_frame import SingleColumnFrame
 from cudf.core.udf.scalar_function import _get_scalar_kernel
-from cudf.utils import cudautils, docutils
+from cudf.utils import docutils
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import (
     can_convert_to_column,
@@ -2969,19 +2968,22 @@ def digitize(self, bins, right=False):
 
     @_cudf_nvtx_annotate
     def diff(self, periods=1):
-        """Calculate the difference between values at positions i and i - N in
-        an array and store the output in a new array.
+        """First discrete difference of element.
+
+        Calculates the difference of a Series element compared with another
+        element in the Series (default is element in previous row).
+
+        Parameters
+        ----------
+        periods : int, default 1
+            Periods to shift for calculating difference,
+            accepts negative values.
 
         Returns
         -------
         Series
             First differences of the Series.
 
-        Notes
-        -----
-        Diff currently only supports float and integer dtype columns with
-        no null values.
-
         Examples
         --------
         >>> import cudf
@@ -3028,32 +3030,12 @@ def diff(self, periods=1):
         5    <NA>
         dtype: int64
         """
-        if self.has_nulls:
-            raise AssertionError(
-                "Diff currently requires columns with no null values"
-            )
-
-        if not np.issubdtype(self.dtype, np.number):
-            raise NotImplementedError(
-                "Diff currently only supports numeric dtypes"
-            )
-
-        # TODO: move this libcudf
-        input_col = self._column
-        output_col = column_empty_like(input_col)
-        output_mask = column_empty_like(input_col, dtype="bool")
-        if output_col.size > 0:
-            cudautils.gpu_diff.forall(output_col.size)(
-                input_col, output_col, output_mask, periods
-            )
-
-        output_col = column.build_column(
-            data=output_col.data,
-            dtype=output_col.dtype,
-            mask=bools_to_mask(output_mask),
-        )
+        if not is_integer(periods):
+            if not (is_float(periods) and periods.is_integer()):
+                raise ValueError("periods must be an integer")
+            periods = int(periods)
 
-        return Series(output_col, name=self.name, index=self.index)
+        return self - self.shift(periods=periods)
 
     @copy_docstring(SeriesGroupBy)
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index addc823e7f1..7fa66bd831d 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -81,8 +81,8 @@ def name(self, value):
 
     @property  # type: ignore
     @_cudf_nvtx_annotate
-    def ndim(self):
-        """Get the dimensionality (always 1 for single-columned frames)."""
+    def ndim(self):  # noqa: D401
+        """Number of dimensions of the underlying data, by definition 1."""
         return 1
 
     @property  # type: ignore
diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py
index 7eea7cedaad..0273227010b 100644
--- a/python/cudf/cudf/core/tools/numeric.py
+++ b/python/cudf/cudf/core/tools/numeric.py
@@ -57,7 +57,7 @@ def to_numeric(arg, errors="raise", downcast=None):
         otherwise ndarray
 
     Notes
-    -------
+    -----
     An important difference from pandas is that this function does not accept
     mixed numeric/non-numeric type sequences. For example ``[1, 'a']``.
     A ``TypeError`` will be raised when such input is received, regardless of
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 13ab0b35822..07261534777 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -9098,7 +9098,7 @@ def test_groupby_cov_for_pandas_bug_case():
     ],
 )
 @pytest.mark.parametrize("periods", (-5, -1, 0, 1, 5))
-def test_diff_dataframe_numeric_dtypes(data, periods):
+def test_diff_numeric_dtypes(data, periods):
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
 
@@ -9137,7 +9137,7 @@ def test_diff_decimal_dtypes(precision, scale, dtype):
     )
 
 
-def test_diff_dataframe_invalid_axis():
+def test_diff_invalid_axis():
     gdf = cudf.DataFrame(np.array([1.123, 2.343, 5.890, 0.0]))
     with pytest.raises(NotImplementedError, match="Only axis=0 is supported."):
         gdf.diff(periods=1, axis=1)
@@ -9152,16 +9152,30 @@ def test_diff_dataframe_invalid_axis():
             "string_col": ["a", "b", "c", "d", "e"],
         },
         ["a", "b", "c", "d", "e"],
-        [np.nan, None, np.nan, None],
     ],
 )
-def test_diff_dataframe_non_numeric_dypes(data):
+def test_diff_unsupported_dtypes(data):
     gdf = cudf.DataFrame(data)
     with pytest.raises(
-        NotImplementedError,
-        match="DataFrame.diff only supports numeric dtypes",
+        TypeError,
+        match=r"unsupported operand type\(s\)",
     ):
-        gdf.diff(periods=2, axis=0)
+        gdf.diff()
+
+
+def test_diff_many_dtypes():
+    pdf = pd.DataFrame(
+        {
+            "dates": pd.date_range("2020-01-01", "2020-01-06", freq="D"),
+            "bools": [True, True, True, False, True, True],
+            "floats": [1.0, 2.0, 3.5, np.nan, 5.0, -1.7],
+            "ints": [1, 2, 3, 3, 4, 5],
+            "nans_nulls": [np.nan, None, None, np.nan, np.nan, None],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    assert_eq(pdf.diff(), gdf.diff())
+    assert_eq(pdf.diff(periods=2), gdf.diff(periods=2))
 
 
 def test_dataframe_assign_cp_np_array():
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 6f0f77f0aa2..fccb9f680d9 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -18,6 +18,7 @@
     TIMEDELTA_TYPES,
     assert_eq,
     assert_exceptions_equal,
+    gen_rand,
 )
 
 
@@ -1724,3 +1725,60 @@ def test_isin_categorical(data, values):
     got = gsr.isin(values)
     expected = psr.isin(values)
     assert_eq(got, expected)
+
+
+@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
+@pytest.mark.parametrize("period", [-1, -5, -10, -20, 0, 1, 5, 10, 20])
+@pytest.mark.parametrize("data_empty", [False, True])
+def test_diff(dtype, period, data_empty):
+    if data_empty:
+        data = None
+    else:
+        if dtype == np.int8:
+            # to keep data in range
+            data = gen_rand(dtype, 100000, low=-2, high=2)
+        else:
+            data = gen_rand(dtype, 100000)
+
+    gs = cudf.Series(data, dtype=dtype)
+    ps = pd.Series(data, dtype=dtype)
+
+    expected_outcome = ps.diff(period)
+    diffed_outcome = gs.diff(period).astype(expected_outcome.dtype)
+
+    if data_empty:
+        assert_eq(diffed_outcome, expected_outcome, check_index_type=False)
+    else:
+        assert_eq(diffed_outcome, expected_outcome)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["a", "b", "c", "d", "e"],
+    ],
+)
+def test_diff_unsupported_dtypes(data):
+    gs = cudf.Series(data)
+    with pytest.raises(
+        TypeError,
+        match=r"unsupported operand type\(s\)",
+    ):
+        gs.diff()
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.date_range("2020-01-01", "2020-01-06", freq="D"),
+        [True, True, True, False, True, True],
+        [1.0, 2.0, 3.5, 4.0, 5.0, -1.7],
+        [1, 2, 3, 3, 4, 5],
+        [np.nan, None, None, np.nan, np.nan, None],
+    ],
+)
+def test_diff_many_dtypes(data):
+    ps = pd.Series(data)
+    gs = cudf.from_pandas(ps)
+    assert_eq(ps.diff(), gs.diff())
+    assert_eq(ps.diff(periods=2), gs.diff(periods=2))
diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py
index 4796402f14d..fb6e35f4f58 100755
--- a/python/cudf/cudf/utils/cudautils.py
+++ b/python/cudf/cudf/utils/cudautils.py
@@ -14,27 +14,6 @@
 #
 
 
-@cuda.jit
-def gpu_diff(in_col, out_col, out_mask, N):
-    """Calculate the difference between values at positions i and i - N in an
-    array and store the output in a new array.
-    """
-    i = cuda.grid(1)
-
-    if N > 0:
-        if i < in_col.size:
-            out_col[i] = in_col[i] - in_col[i - N]
-            out_mask[i] = True
-        if i < N:
-            out_mask[i] = False
-    else:
-        if i <= (in_col.size + N):
-            out_col[i] = in_col[i] - in_col[i - N]
-            out_mask[i] = True
-        if i >= (in_col.size + N) and i < in_col.size:
-            out_mask[i] = False
-
-
 # Find segments