diff --git a/.clang-tidy b/.clang-tidy
new file mode 100644
index 00000000000..043a93e6ff9
--- /dev/null
+++ b/.clang-tidy
@@ -0,0 +1,27 @@
+---
+Checks:    
+      'modernize-*,
+       -modernize-use-equals-default,
+       -modernize-concat-nested-namespaces,
+       -modernize-use-trailing-return-type'
+      
+      # -modernize-use-equals-default        # auto-fix is broken (doesn't insert =default correctly)
+      # -modernize-concat-nested-namespaces  # auto-fix is broken (can delete code)
+      # -modernize-use-trailing-return-type  # just a preference
+
+WarningsAsErrors: ''
+HeaderFilterRegex: ''
+AnalyzeTemporaryDtors: false
+FormatStyle:     none
+CheckOptions:
+ - key:             modernize-loop-convert.MaxCopySize
+   value:           '16'
+ - key:             modernize-loop-convert.MinConfidence
+   value:           reasonable
+ - key:             modernize-pass-by-value.IncludeStyle
+   value:           llvm
+ - key:             modernize-replace-auto-ptr.IncludeStyle
+   value:           llvm
+ - key:             modernize-use-nullptr.NullMacros
+   value:           'NULL'
+...
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 68ff9abc9ea..6d4bdfb8d98 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+# cuDF 22.04.00 (Date TBD)
+
+Please see https://github.com/rapidsai/cudf/releases/tag/v22.04.00a for the latest changes to this development branch.
+
 # cuDF 22.02.00 (Date TBD)
 
 Please see https://github.com/rapidsai/cudf/releases/tag/v22.02.00a for the latest changes to this development branch.
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index eef8a04067d..00000000000
--- a/Dockerfile
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-
-# An integration test & dev container which builds and installs cuDF from main
-ARG CUDA_VERSION=11.0
-ARG CUDA_SHORT_VERSION=${CUDA_VERSION}
-ARG LINUX_VERSION=ubuntu18.04
-FROM nvidia/cuda:${CUDA_VERSION}-devel-${LINUX_VERSION}
-ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/lib
-ENV DEBIAN_FRONTEND=noninteractive
-
-ARG CC=9
-ARG CXX=9
-RUN apt update -y --fix-missing && \
-    apt upgrade -y && \
-    apt install -y --no-install-recommends software-properties-common && \
-    add-apt-repository ppa:ubuntu-toolchain-r/test && \
-    apt update -y --fix-missing && \
-    apt install -y --no-install-recommends \
-      git \
-      gcc-${CC} \
-      g++-${CXX} \
-      tzdata && \
-    apt-get autoremove -y && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-# Install conda
-ADD https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh /miniconda.sh
-RUN sh /miniconda.sh -b -p /conda && /conda/bin/conda update -n base conda
-ENV PATH=${PATH}:/conda/bin
-# Enables "source activate conda"
-SHELL ["/bin/bash", "-c"]
-
-# Build cuDF conda env
-ARG CUDA_SHORT_VERSION
-ARG PYTHON_VERSION
-ENV PYTHON_VERSION=$PYTHON_VERSION
-ARG NUMBA_VERSION
-ENV NUMBA_VERSION=$NUMBA_VERSION
-ARG NUMPY_VERSION
-ENV NUMPY_VERSION=$NUMPY_VERSION
-ARG PANDAS_VERSION
-ENV PANDAS_VERSION=$PANDAS_VERSION
-ARG PYARROW_VERSION
-ENV PYARROW_VERSION=$PYARROW_VERSION
-ARG CYTHON_VERSION
-ENV CYTHON_VERSION=$CYTHON_VERSION
-ARG CMAKE_VERSION
-ENV CMAKE_VERSION=$CMAKE_VERSION
-ARG CUDF_REPO=https://github.com/rapidsai/cudf
-ENV CUDF_REPO=$CUDF_REPO
-ARG CUDF_BRANCH=main
-ENV CUDF_BRANCH=$CUDF_BRANCH
-
-# Add everything from the local build context
-ADD . /cudf/
-
-# Checks if local build context has the source, if not clone it then run a bash script to modify
-# the environment file based on versions set in build args
-RUN ls -la /cudf
-RUN if [ -f /cudf/docker/package_versions.sh ]; \
-    then /cudf/docker/package_versions.sh /cudf/conda/environments/cudf_dev_cuda${CUDA_SHORT_VERSION}.yml && \
-         conda env create --name cudf --file /cudf/conda/environments/cudf_dev_cuda${CUDA_SHORT_VERSION}.yml ; \
-    else rm -rf /cudf && \
-         git clone --recurse-submodules -b ${CUDF_BRANCH} ${CUDF_REPO} /cudf && \
-         /cudf/docker/package_versions.sh /cudf/conda/environments/cudf_dev_cuda${CUDA_SHORT_VERSION}.yml && \
-         conda env create --name cudf --file /cudf/conda/environments/cudf_dev_cuda${CUDA_SHORT_VERSION}.yml ; \
-    fi
-
-ENV CC=/opts/conda/envs/rapids/bin/gcc-${CC}
-ENV CXX=/opts/conda/envs/rapids/bin/g++-${CXX}
-
-# libcudf & cudf build/install
-RUN source activate cudf && \
-    cd /cudf/ && \
-    ./build.sh libcudf cudf
diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
index 534ac19ee98..178bdab0154 100755
--- a/ci/benchmark/build.sh
+++ b/ci/benchmark/build.sh
@@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
 export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
 
 # Dask & Distributed git tag
-export DASK_DISTRIBUTED_GIT_TAG='main'
+export DASK_DISTRIBUTED_GIT_TAG='2022.01.0'
 
 function remove_libcudf_kernel_cache_dir {
     EXITCODE=$?
diff --git a/ci/cpu/prebuild.sh b/ci/cpu/prebuild.sh
index 746c0005a47..8a2c9d9be7c 100755
--- a/ci/cpu/prebuild.sh
+++ b/ci/cpu/prebuild.sh
@@ -4,12 +4,13 @@
 set -e
 
 DEFAULT_CUDA_VER="11.5"
+DEFAULT_PYTHON_VER="3.8"
 
 #Always upload cudf Python package
 export UPLOAD_CUDF=1
 
 #Upload libcudf once per CUDA
-if [[ "$PYTHON" == "3.7" ]]; then
+if [[ "$PYTHON" == "${DEFAULT_PYTHON_VER}" ]]; then
     export UPLOAD_LIBCUDF=1
 else
     export UPLOAD_LIBCUDF=0
@@ -23,7 +24,7 @@ else
 fi
 
 #We only want to upload libcudf_kafka once per python/CUDA combo
-if [[ "$PYTHON" == "3.7" ]] && [[ "$CUDA" == "${DEFAULT_CUDA_VER}" ]]; then
+if [[ "$PYTHON" == "${DEFAULT_PYTHON_VER}" ]] && [[ "$CUDA" == "${DEFAULT_CUDA_VER}" ]]; then
     export UPLOAD_LIBCUDF_KAFKA=1
 else
     export UPLOAD_LIBCUDF_KAFKA=0
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 39a39c46eff..d5fb7451769 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -31,10 +31,10 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
 export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 
 # Dask & Distributed git tag
-export DASK_DISTRIBUTED_GIT_TAG='main'
+export DASK_DISTRIBUTED_GIT_TAG='2022.01.0'
 
 # ucx-py version
-export UCX_PY_VERSION='0.24.*'
+export UCX_PY_VERSION='0.25.*'
 
 ################################################################################
 # TRAP - Setup trap for removing jitify cache
diff --git a/ci/gpu/java.sh b/ci/gpu/java.sh
index b048470d155..ab5202fa9f7 100755
--- a/ci/gpu/java.sh
+++ b/ci/gpu/java.sh
@@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
 export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 
 # ucx-py version
-export UCX_PY_VERSION='0.24.*'
+export UCX_PY_VERSION='0.25.*'
 
 ################################################################################
 # TRAP - Setup trap for removing jitify cache
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index bbbc754e850..b9577d937d9 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -10,7 +10,7 @@ dependencies:
   - clang=11.1.0
   - clang-tools=11.1.0
   - cupy>=9.5.0,<11.0.0a0
-  - rmm=22.02.*
+  - rmm=22.04.*
   - cmake>=3.20.1
   - cmake_setuptools>=0.1.3
   - python>=3.7,<3.9
@@ -33,6 +33,7 @@ dependencies:
   - ipython
   - pandoc=<2.0.0
   - cudatoolkit=11.5
+  - cuda-python >=11.5,<12.0
   - pip
   - flake8=3.8.3
   - black=19.10
@@ -41,8 +42,8 @@ dependencies:
   - pydocstyle=6.1.1
   - typing_extensions
   - pre-commit
-  - dask>=2021.11.1
-  - distributed>=2021.11.1
+  - dask>=2021.11.1,<=2022.01.0
+  - distributed>=2021.11.1,<=2022.01.0
   - streamz
   - arrow-cpp=5.0.0
   - dlpack>=0.5,<0.6.0a0
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index a20749bc8c9..bd1412bc611 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -52,7 +52,7 @@ requirements:
     - packaging
     - cachetools
     - ptxcompiler  # [linux64]  # CUDA enhanced compatibility. See https://github.com/rapidsai/ptxcompiler
-
+    - cuda-python >=11.5,<12.0
 test:                                   # [linux64]
   requires:                             # [linux64]
     - cudatoolkit {{ cuda_version }}.*  # [linux64]
diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml
index acb56c464e4..56f2730db7a 100644
--- a/conda/recipes/cudf_kafka/meta.yaml
+++ b/conda/recipes/cudf_kafka/meta.yaml
@@ -35,7 +35,7 @@ requirements:
   run:
     - python
     - libcudf_kafka {{ version }}
-    - python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version_numeric }}*
+    - python-confluent-kafka >=1.7.0,<1.8.0a0
     - cudf {{ version }}
 
 test:                                   # [linux64]
diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
index 8bcdd1ec61e..2e8badc3a54 100644
--- a/conda/recipes/custreamz/meta.yaml
+++ b/conda/recipes/custreamz/meta.yaml
@@ -26,15 +26,15 @@ build:
 requirements:
   host:
     - python
-    - python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version_numeric }}*
+    - python-confluent-kafka >=1.7.0,<1.8.0a0
     - cudf_kafka {{ version }}
   run:
     - python
     - streamz
     - cudf {{ version }}
-    - dask>=2021.11.1,<=2021.11.2
-    - distributed>=2021.11.1,<=2021.11.2
-    - python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version_numeric }}*
+    - dask>=2021.11.1,<=2022.01.0
+    - distributed>=2021.11.1,<=2022.01.0
+    - python-confluent-kafka >=1.7.0,<1.8.0a0
     - cudf_kafka {{ version }}
 
 test:                                   # [linux64]
diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
index fd34ff4112d..225d77729df 100644
--- a/conda/recipes/dask-cudf/meta.yaml
+++ b/conda/recipes/dask-cudf/meta.yaml
@@ -27,14 +27,14 @@ requirements:
   host:
     - python
     - cudf {{ version }}
-    - dask>=2021.11.1
-    - distributed>=2021.11.1
+    - dask>=2021.11.1,<=2022.01.0
+    - distributed>=2021.11.1,<=2022.01.0
     - cudatoolkit {{ cuda_version }}
   run:
     - python
     - cudf {{ version }}
-    - dask>=2021.11.1
-    - distributed>=2021.11.1
+    - dask>=2021.11.1,<=2022.01.0
+    - distributed>=2021.11.1,<=2022.01.0
     - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
 
 test:                                   # [linux64]
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2f51f582e12..e4637408110 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -25,7 +25,7 @@ rapids_cuda_init_architectures(CUDF)
 
 project(
   CUDF
-  VERSION 22.02.00
+  VERSION 22.04.00
   LANGUAGES C CXX CUDA
 )
 
@@ -186,6 +186,8 @@ add_library(
   src/binaryop/compiled/Mod.cu
   src/binaryop/compiled/Mul.cu
   src/binaryop/compiled/NullEquals.cu
+  src/binaryop/compiled/NullLogicalOr.cu
+  src/binaryop/compiled/NullLogicalAnd.cu
   src/binaryop/compiled/NullMax.cu
   src/binaryop/compiled/NullMin.cu
   src/binaryop/compiled/PMod.cu
@@ -322,11 +324,16 @@ add_library(
   src/jit/parser.cpp
   src/jit/type.cpp
   src/join/conditional_join.cu
-  src/join/mixed_join.cu
   src/join/cross_join.cu
   src/join/hash_join.cu
   src/join/join.cu
   src/join/join_utils.cu
+  src/join/mixed_join.cu
+  src/join/mixed_join_kernels.cu
+  src/join/mixed_join_kernels_semi.cu
+  src/join/mixed_join_semi.cu
+  src/join/mixed_join_size_kernels.cu
+  src/join/mixed_join_size_kernels_semi.cu
   src/join/semi_join.cu
   src/lists/contains.cu
   src/lists/combine/concatenate_list_elements.cu
diff --git a/cpp/benchmarks/common/generate_benchmark_input.cpp b/cpp/benchmarks/common/generate_benchmark_input.cpp
index 995cea13c27..dcd8e32fc9d 100644
--- a/cpp/benchmarks/common/generate_benchmark_input.cpp
+++ b/cpp/benchmarks/common/generate_benchmark_input.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,6 +31,7 @@
 
 #include <future>
 #include <memory>
+#include <optional>
 #include <random>
 #include <thread>
 #include <vector>
diff --git a/cpp/benchmarks/common/generate_benchmark_input.hpp b/cpp/benchmarks/common/generate_benchmark_input.hpp
index 3dbc6561839..893c8a61543 100644
--- a/cpp/benchmarks/common/generate_benchmark_input.hpp
+++ b/cpp/benchmarks/common/generate_benchmark_input.hpp
@@ -300,8 +300,8 @@ class data_profile {
 
   auto get_bool_probability() const { return bool_probability; }
   auto get_null_frequency() const { return null_frequency; };
-  auto get_cardinality() const { return cardinality; };
-  auto get_avg_run_length() const { return avg_run_length; };
+  [[nodiscard]] auto get_cardinality() const { return cardinality; };
+  [[nodiscard]] auto get_avg_run_length() const { return avg_run_length; };
 
   // Users should pass integral values for bounds when setting the parameters for types that have
   // discrete distributions (integers, strings, lists). Otherwise the call with have no effect.
diff --git a/cpp/benchmarks/copying/contiguous_split_benchmark.cu b/cpp/benchmarks/copying/contiguous_split_benchmark.cu
index 55e1360efc8..bb6a9320c4a 100644
--- a/cpp/benchmarks/copying/contiguous_split_benchmark.cu
+++ b/cpp/benchmarks/copying/contiguous_split_benchmark.cu
@@ -51,10 +51,12 @@ void BM_contiguous_split_common(benchmark::State& state,
   std::vector<std::unique_ptr<cudf::column>> columns(src_cols.size());
   std::transform(src_cols.begin(), src_cols.end(), columns.begin(), [](T& in) {
     auto ret = in.release();
-    ret->null_count();
+    // computing the null count is not a part of the benchmark's target code path, and we want the
+    // property to be pre-computed so that we measure the performance of only the intended code path
+    [[maybe_unused]] auto const nulls = ret->null_count();
     return ret;
   });
-  cudf::table src_table(std::move(columns));
+  auto const src_table = cudf::table(std::move(columns));
 
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
diff --git a/cpp/benchmarks/copying/gather_benchmark.cu b/cpp/benchmarks/copying/gather_benchmark.cu
index f075e9c486e..eaa201a0678 100644
--- a/cpp/benchmarks/copying/gather_benchmark.cu
+++ b/cpp/benchmarks/copying/gather_benchmark.cu
@@ -39,7 +39,7 @@ template <class TypeParam, bool coalesce>
 void BM_gather(benchmark::State& state)
 {
   const cudf::size_type source_size{(cudf::size_type)state.range(0)};
-  const cudf::size_type n_cols = (cudf::size_type)state.range(1);
+  const auto n_cols = (cudf::size_type)state.range(1);
 
   // Every element is valid
   auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
diff --git a/cpp/benchmarks/copying/scatter_benchmark.cu b/cpp/benchmarks/copying/scatter_benchmark.cu
index 0c24dd50a13..a9ab376c8c3 100644
--- a/cpp/benchmarks/copying/scatter_benchmark.cu
+++ b/cpp/benchmarks/copying/scatter_benchmark.cu
@@ -40,7 +40,7 @@ template <class TypeParam, bool coalesce>
 void BM_scatter(benchmark::State& state)
 {
   const cudf::size_type source_size{(cudf::size_type)state.range(0)};
-  const cudf::size_type n_cols = (cudf::size_type)state.range(1);
+  const auto n_cols = (cudf::size_type)state.range(1);
 
   // Every element is valid
   auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp
index 8476a137c12..ca3a748ccad 100644
--- a/cpp/benchmarks/fixture/benchmark_fixture.hpp
+++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp
@@ -29,9 +29,11 @@ namespace {
 // memory resource factory helpers
 inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
 
-inline auto make_pool()
+inline auto make_pool_instance()
 {
-  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
+  static rmm::mr::cuda_memory_resource cuda_mr;
+  static rmm::mr::pool_memory_resource pool_mr{&cuda_mr};
+  return std::shared_ptr<rmm::mr::device_memory_resource>(&pool_mr);
 }
 }  // namespace
 
@@ -68,13 +70,19 @@ inline auto make_pool()
  */
 class benchmark : public ::benchmark::Fixture {
  public:
-  virtual void SetUp(const ::benchmark::State& state)
+  benchmark() : ::benchmark::Fixture()
   {
-    mr = make_pool();
+    const char* env_iterations = std::getenv("CUDF_BENCHMARK_ITERATIONS");
+    if (env_iterations != nullptr) { this->Iterations(std::max(0L, atol(env_iterations))); }
+  }
+
+  void SetUp(const ::benchmark::State& state) override
+  {
+    mr = make_pool_instance();
     rmm::mr::set_current_device_resource(mr.get());  // set default resource to pool
   }
 
-  virtual void TearDown(const ::benchmark::State& state)
+  void TearDown(const ::benchmark::State& state) override
   {
     // reset default resource to the initial resource
     rmm::mr::set_current_device_resource(nullptr);
@@ -82,8 +90,8 @@ class benchmark : public ::benchmark::Fixture {
   }
 
   // eliminate partial override warnings (see benchmark/benchmark.h)
-  virtual void SetUp(::benchmark::State& st) { SetUp(const_cast<const ::benchmark::State&>(st)); }
-  virtual void TearDown(::benchmark::State& st)
+  void SetUp(::benchmark::State& st) override { SetUp(const_cast<const ::benchmark::State&>(st)); }
+  void TearDown(::benchmark::State& st) override
   {
     TearDown(const_cast<const ::benchmark::State&>(st));
   }
@@ -102,7 +110,10 @@ class memory_stats_logger {
 
   ~memory_stats_logger() { rmm::mr::set_current_device_resource(existing_mr); }
 
-  size_t peak_memory_usage() const noexcept { return statistics_mr.get_bytes_counter().peak; }
+  [[nodiscard]] size_t peak_memory_usage() const noexcept
+  {
+    return statistics_mr.get_bytes_counter().peak;
+  }
 
  private:
   rmm::mr::device_memory_resource* existing_mr;
diff --git a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp
index 77bf4b03a14..7de10f9f4c1 100644
--- a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp
@@ -35,8 +35,8 @@ class CsvRead : public cudf::benchmark {
 
 void BM_csv_read_varying_input(benchmark::State& state)
 {
-  auto const data_types     = get_type_or_group(state.range(0));
-  io_type const source_type = static_cast<io_type>(state.range(1));
+  auto const data_types  = get_type_or_group(state.range(0));
+  auto const source_type = static_cast<io_type>(state.range(1));
 
   auto const tbl  = create_random_table(data_types, num_cols, table_size_bytes{data_size});
   auto const view = tbl->view();
diff --git a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp
index 9baab6b2571..1e757da6f33 100644
--- a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp
+++ b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp
@@ -35,8 +35,8 @@ class CsvWrite : public cudf::benchmark {
 
 void BM_csv_write_varying_inout(benchmark::State& state)
 {
-  auto const data_types   = get_type_or_group(state.range(0));
-  io_type const sink_type = static_cast<io_type>(state.range(1));
+  auto const data_types = get_type_or_group(state.range(0));
+  auto const sink_type  = static_cast<io_type>(state.range(1));
 
   auto const tbl  = create_random_table(data_types, num_cols, table_size_bytes{data_size});
   auto const view = tbl->view();
diff --git a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp
index 6ab8d8d09c0..0c54136226a 100644
--- a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp
@@ -40,7 +40,7 @@ void BM_orc_read_varying_input(benchmark::State& state)
   cudf::size_type const run_length  = state.range(2);
   cudf_io::compression_type const compression =
     state.range(3) ? cudf_io::compression_type::SNAPPY : cudf_io::compression_type::NONE;
-  io_type const source_type = static_cast<io_type>(state.range(4));
+  auto const source_type = static_cast<io_type>(state.range(4));
 
   data_profile table_data_profile;
   table_data_profile.set_cardinality(cardinality);
diff --git a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp
index 933b3d02e08..b0eba17359f 100644
--- a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "cudf/io/types.hpp"
 #include <benchmark/benchmark.h>
 
 #include <benchmarks/common/generate_benchmark_input.hpp>
@@ -40,7 +41,7 @@ void BM_orc_write_varying_inout(benchmark::State& state)
   cudf::size_type const run_length  = state.range(2);
   cudf_io::compression_type const compression =
     state.range(3) ? cudf_io::compression_type::SNAPPY : cudf_io::compression_type::NONE;
-  io_type const sink_type = static_cast<io_type>(state.range(4));
+  auto const sink_type = static_cast<io_type>(state.range(4));
 
   data_profile table_data_profile;
   table_data_profile.set_cardinality(cardinality);
@@ -65,8 +66,14 @@ void BM_orc_write_varying_inout(benchmark::State& state)
 
 void BM_orc_write_varying_options(benchmark::State& state)
 {
-  auto const compression  = static_cast<cudf::io::compression_type>(state.range(0));
-  auto const enable_stats = state.range(1) != 0;
+  auto const compression = static_cast<cudf::io::compression_type>(state.range(0));
+  auto const stats_freq  = [&] {
+    switch (state.range(2)) {
+      case 0: return cudf::io::STATISTICS_NONE;
+      case 1: return cudf::io::ORC_STATISTICS_STRIPE;
+      default: return cudf::io::ORC_STATISTICS_ROW_GROUP;
+    }
+  }();
 
   auto const data_types = get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
                                              int32_t(type_group_id::FLOATING_POINT),
@@ -85,7 +92,7 @@ void BM_orc_write_varying_options(benchmark::State& state)
     cudf_io::orc_writer_options const options =
       cudf_io::orc_writer_options::builder(source_sink.make_sink_info(), view)
         .compression(compression)
-        .enable_statistics(enable_stats);
+        .enable_statistics(stats_freq);
     cudf_io::write_orc(options);
   }
 
@@ -113,6 +120,8 @@ BENCHMARK_DEFINE_F(OrcWrite, writer_options)
 BENCHMARK_REGISTER_F(OrcWrite, writer_options)
   ->ArgsProduct({{int32_t(cudf::io::compression_type::NONE),
                   int32_t(cudf::io::compression_type::SNAPPY)},
-                 {0, 1}})
+                 {int32_t{cudf::io::STATISTICS_NONE},
+                  int32_t{cudf::io::ORC_STATISTICS_STRIPE},
+                  int32_t{cudf::io::ORC_STATISTICS_ROW_GROUP}}})
   ->Unit(benchmark::kMillisecond)
   ->UseManualTime();
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp
index 888102c03be..d9e37d84036 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp
@@ -40,7 +40,7 @@ void BM_parq_read_varying_input(benchmark::State& state)
   cudf::size_type const run_length  = state.range(2);
   cudf_io::compression_type const compression =
     state.range(3) ? cudf_io::compression_type::SNAPPY : cudf_io::compression_type::NONE;
-  io_type const source_type = static_cast<io_type>(state.range(4));
+  auto const source_type = static_cast<io_type>(state.range(4));
 
   data_profile table_data_profile;
   table_data_profile.set_cardinality(cardinality);
diff --git a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp
index 1af7e206692..74289fd414a 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp
@@ -40,7 +40,7 @@ void BM_parq_write_varying_inout(benchmark::State& state)
   cudf::size_type const run_length  = state.range(2);
   cudf_io::compression_type const compression =
     state.range(3) ? cudf_io::compression_type::SNAPPY : cudf_io::compression_type::NONE;
-  io_type const sink_type = static_cast<io_type>(state.range(4));
+  auto const sink_type = static_cast<io_type>(state.range(4));
 
   data_profile table_data_profile;
   table_data_profile.set_cardinality(cardinality);
diff --git a/cpp/benchmarks/lists/copying/scatter_lists_benchmark.cu b/cpp/benchmarks/lists/copying/scatter_lists_benchmark.cu
index 49007fda7a3..22e4be9ce9d 100644
--- a/cpp/benchmarks/lists/copying/scatter_lists_benchmark.cu
+++ b/cpp/benchmarks/lists/copying/scatter_lists_benchmark.cu
@@ -45,7 +45,7 @@ void BM_lists_scatter(::benchmark::State& state)
 
   const size_type base_size{(size_type)state.range(0)};
   const size_type num_elements_per_row{(size_type)state.range(1)};
-  const size_type num_rows = (size_type)ceil(double(base_size) / num_elements_per_row);
+  const auto num_rows = (size_type)ceil(double(base_size) / num_elements_per_row);
 
   auto source_base_col = make_fixed_width_column(
     data_type{type_to_id<TypeParam>()}, base_size, mask_state::UNALLOCATED, stream, mr);
diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu
index 8e51bcca63d..90097889a86 100644
--- a/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu
+++ b/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu
@@ -170,11 +170,11 @@ void launch_kernel(cudf::mutable_table_view input, T** d_ptr, int work_per_threa
 template <class TypeParam, FunctorType functor_type, DispatchingType dispatching_type>
 void type_dispatcher_benchmark(::benchmark::State& state)
 {
-  const cudf::size_type source_size = static_cast<cudf::size_type>(state.range(1));
+  const auto source_size = static_cast<cudf::size_type>(state.range(1));
 
-  const cudf::size_type n_cols = static_cast<cudf::size_type>(state.range(0));
+  const auto n_cols = static_cast<cudf::size_type>(state.range(0));
 
-  const cudf::size_type work_per_thread = static_cast<cudf::size_type>(state.range(2));
+  const auto work_per_thread = static_cast<cudf::size_type>(state.range(2));
 
   auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
 
diff --git a/cpp/cmake/libcudacxx.patch b/cpp/cmake/libcudacxx.patch
deleted file mode 100644
index 3cdc40ef084..00000000000
--- a/cpp/cmake/libcudacxx.patch
+++ /dev/null
@@ -1,21 +0,0 @@
-diff --git a/include/cuda/std/detail/__config b/include/cuda/std/detail/__config
-index d55a43688..654142d7e 100644
---- a/include/cuda/std/detail/__config
-+++ b/include/cuda/std/detail/__config
-@@ -23,7 +23,7 @@
-     #define _LIBCUDACXX_CUDACC_VER_MINOR __CUDACC_VER_MINOR__
-     #define _LIBCUDACXX_CUDACC_VER_BUILD __CUDACC_VER_BUILD__
-     #define _LIBCUDACXX_CUDACC_VER                                                  \
--        _LIBCUDACXX_CUDACC_VER_MAJOR * 10000 + _LIBCUDACXX_CUDACC_VER_MINOR * 100 + \
-+        _LIBCUDACXX_CUDACC_VER_MAJOR * 100000 + _LIBCUDACXX_CUDACC_VER_MINOR * 1000 + \
-         _LIBCUDACXX_CUDACC_VER_BUILD
- 
-     #define _LIBCUDACXX_HAS_NO_LONG_DOUBLE
-@@ -64,7 +64,7 @@
- #  endif
- #endif
- 
--#if defined(_LIBCUDACXX_COMPILER_MSVC) || (defined(_LIBCUDACXX_CUDACC_VER) && (_LIBCUDACXX_CUDACC_VER < 110500))
-+#if defined(_LIBCUDACXX_COMPILER_MSVC) || (defined(_LIBCUDACXX_CUDACC_VER) && (_LIBCUDACXX_CUDACC_VER < 1105000))
- #  define _LIBCUDACXX_HAS_NO_INT128
- #endif
diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake
index 16e7a58b020..c964c85156c 100644
--- a/cpp/cmake/thirdparty/get_cucollections.cmake
+++ b/cpp/cmake/thirdparty/get_cucollections.cmake
@@ -21,7 +21,7 @@ function(find_and_configure_cucollections)
     cuco 0.0
     GLOBAL_TARGETS cuco::cuco
     CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections
-    GIT_TAG 193de1aa74f5721717f991ca757dc610c852bb17
+    GIT_TAG 0ca860b824f5dc22cf8a41f09912e62e11f07d82
     OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF"
   )
 
diff --git a/cpp/cmake/thirdparty/get_libcudacxx.cmake b/cpp/cmake/thirdparty/get_libcudacxx.cmake
index 0917adcd764..4b2917bc11e 100644
--- a/cpp/cmake/thirdparty/get_libcudacxx.cmake
+++ b/cpp/cmake/thirdparty/get_libcudacxx.cmake
@@ -16,11 +16,7 @@
 function(find_and_configure_libcudacxx)
   include(${rapids-cmake-dir}/cpm/libcudacxx.cmake)
 
-  rapids_cpm_libcudacxx(
-    BUILD_EXPORT_SET cudf-exports
-    INSTALL_EXPORT_SET cudf-exports PATCH_COMMAND patch --reject-file=- -p1 -N <
-                                    ${CUDF_SOURCE_DIR}/cmake/libcudacxx.patch || true
-  )
+  rapids_cpm_libcudacxx(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
 
   set(LIBCUDACXX_INCLUDE_DIR
       "${libcudacxx_SOURCE_DIR}/include"
diff --git a/cpp/docs/BENCHMARKING.md b/cpp/docs/BENCHMARKING.md
index ed99ff5f1be..a0df9917064 100644
--- a/cpp/docs/BENCHMARKING.md
+++ b/cpp/docs/BENCHMARKING.md
@@ -1,19 +1,25 @@
 # Unit Benchmarking in libcudf
 
-Unit benchmarks in libcudf are written using [Google Benchmark](https://github.com/google/benchmark).
+Unit benchmarks in libcudf are written using [NVBench](https://github.com/NVIDIA/nvbench).
+While many existing benchmarks are written using
+[Google Benchmark](https://github.com/google/benchmark), new benchmarks should use NVBench.
 
-Google Benchmark provides many options for specifying ranges of parameters to benchmarks to test
-with varying parameters, as well as to control the time unit reported, among other options. Refer to
-other benchmarks in `cpp/benchmarks` to understand the options.
+The NVBench library is similar to Google Benchmark, but has several quality of life improvements
+when doing GPU benchmarking such as displaying the fraction of peak memory bandwidth achieved and
+details about the GPU hardware.
+
+Both NVBench and Google Benchmark provide many options for specifying ranges of parameters to
+benchmark, as well as to control the time unit reported, among other options. Refer to existing
+benchmarks in `cpp/benchmarks` to understand the options.
 
 ## Directory and File Naming
 
 The naming of unit benchmark directories and source files should be consistent with the feature
 being benchmarked. For example, the benchmarks for APIs in `copying.hpp` should live in
-`cudf/cpp/benchmarks/copying`. Each feature (or set of related features) should have its own
+`cpp/benchmarks/copying`. Each feature (or set of related features) should have its own
 benchmark source file named `<feature>_benchmark.cu/cpp`. For example,
-`cudf/cpp/src/copying/scatter.cu` has benchmarks in
-`cudf/cpp/benchmarks/copying/scatter_benchmark.cu`.
+`cpp/src/copying/scatter.cu` has benchmarks in
+`cpp/benchmarks/copying/scatter_benchmark.cu`.
 
 In the interest of improving compile time, whenever possible, test source files should be `.cpp`
 files because `nvcc` is slower than `gcc` in compiling host code. Note that `thrust::device_vector`
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index 6a556bb4b34..3f98209852d 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "libcudf"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 22.02.00
+PROJECT_NUMBER         = 22.04.00
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
@@ -2168,7 +2168,7 @@ SKIP_FUNCTION_MACROS   = YES
 # the path). If a tag file is not located in the directory in which doxygen is
 # run, you must also specify the path to the tagfile here.
 
-TAGFILES               = rmm.tag=https://docs.rapids.ai/api/librmm/22.02
+TAGFILES               = rmm.tag=https://docs.rapids.ai/api/librmm/22.04
 
 # When a file name is specified after GENERATE_TAGFILE, doxygen will create a
 # tag file that is based on the input files it reads. See section "Linking to
diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt
index 9bb021f1429..40718c27988 100644
--- a/cpp/examples/basic/CMakeLists.txt
+++ b/cpp/examples/basic/CMakeLists.txt
@@ -14,7 +14,7 @@ file(
 )
 include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake)
 
-set(CUDF_TAG branch-22.02)
+set(CUDF_TAG branch-22.04)
 CPMFindPackage(
   NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf
   GIT_TAG ${CUDF_TAG}
diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp
index 374af536dc5..23587f49334 100644
--- a/cpp/include/cudf/aggregation.hpp
+++ b/cpp/include/cudf/aggregation.hpp
@@ -99,9 +99,9 @@ class aggregation {
   Kind kind;  ///< The aggregation to perform
   virtual ~aggregation() = default;
 
-  virtual bool is_equal(aggregation const& other) const { return kind == other.kind; }
-  virtual size_t do_hash() const { return std::hash<int>{}(kind); }
-  virtual std::unique_ptr<aggregation> clone() const = 0;
+  [[nodiscard]] virtual bool is_equal(aggregation const& other) const { return kind == other.kind; }
+  [[nodiscard]] virtual size_t do_hash() const { return std::hash<int>{}(kind); }
+  [[nodiscard]] virtual std::unique_ptr<aggregation> clone() const = 0;
 
   // override functions for compound aggregations
   virtual std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
@@ -118,7 +118,7 @@ class aggregation {
  */
 class rolling_aggregation : public virtual aggregation {
  public:
-  ~rolling_aggregation() = default;
+  ~rolling_aggregation() override = default;
 
  protected:
   rolling_aggregation() {}
@@ -130,7 +130,7 @@ class rolling_aggregation : public virtual aggregation {
  */
 class groupby_aggregation : public virtual aggregation {
  public:
-  ~groupby_aggregation() = default;
+  ~groupby_aggregation() override = default;
 
  protected:
   groupby_aggregation() {}
@@ -141,7 +141,7 @@ class groupby_aggregation : public virtual aggregation {
  */
 class groupby_scan_aggregation : public virtual aggregation {
  public:
-  ~groupby_scan_aggregation() = default;
+  ~groupby_scan_aggregation() override = default;
 
  protected:
   groupby_scan_aggregation() {}
diff --git a/cpp/include/cudf/ast/detail/expression_evaluator.cuh b/cpp/include/cudf/ast/detail/expression_evaluator.cuh
index ecd46ec2c23..2bfe1b03dd3 100644
--- a/cpp/include/cudf/ast/detail/expression_evaluator.cuh
+++ b/cpp/include/cudf/ast/detail/expression_evaluator.cuh
@@ -76,7 +76,7 @@ struct expression_result {
     subclass().template set_value<Element>(index, result);
   }
 
-  __device__ inline bool is_valid() const { return subclass().is_valid(); }
+  [[nodiscard]] __device__ inline bool is_valid() const { return subclass().is_valid(); }
 
   __device__ inline T value() const { return subclass().value(); }
 };
@@ -110,7 +110,7 @@ struct value_expression_result
   /**
    * @brief Returns true if the underlying data is valid and false otherwise.
    */
-  __device__ inline bool is_valid() const
+  [[nodiscard]] __device__ inline bool is_valid() const
   {
     if constexpr (has_nulls) { return _obj.has_value(); }
     return true;
@@ -174,7 +174,7 @@ struct mutable_column_expression_result
   /**
    * @brief Not implemented for this specialization.
    */
-  __device__ inline bool is_valid() const
+  [[nodiscard]] __device__ inline bool is_valid() const
   {
     // Not implemented since it would require modifying the API in the parent class to accept an
     // index.
@@ -186,7 +186,7 @@ struct mutable_column_expression_result
   /**
    * @brief Not implemented for this specialization.
    */
-  __device__ inline mutable_column_device_view value() const
+  [[nodiscard]] __device__ inline mutable_column_device_view value() const
   {
     // Not implemented since it would require modifying the API in the parent class to accept an
     // index.
diff --git a/cpp/include/cudf/ast/detail/expression_parser.hpp b/cpp/include/cudf/ast/detail/expression_parser.hpp
index 4f73cb1ef6e..0b54dc7e4f0 100644
--- a/cpp/include/cudf/ast/detail/expression_parser.hpp
+++ b/cpp/include/cudf/ast/detail/expression_parser.hpp
@@ -166,7 +166,7 @@ class expression_parser {
    *
    * @return cudf::data_type
    */
-  cudf::data_type output_type() const;
+  [[nodiscard]] cudf::data_type output_type() const;
 
   /**
    * @brief Visit a literal expression.
@@ -206,10 +206,10 @@ class expression_parser {
    */
   class intermediate_counter {
    public:
-    intermediate_counter() : used_values(), max_used(0) {}
+    intermediate_counter() : used_values() {}
     cudf::size_type take();
     void give(cudf::size_type value);
-    cudf::size_type get_max_used() const { return max_used; }
+    [[nodiscard]] cudf::size_type get_max_used() const { return max_used; }
 
    private:
     /**
@@ -221,10 +221,10 @@ class expression_parser {
      *
      * @return cudf::size_type Smallest value not already in the container.
      */
-    cudf::size_type find_first_missing() const;
+    [[nodiscard]] cudf::size_type find_first_missing() const;
 
     std::vector<cudf::size_type> used_values;
-    cudf::size_type max_used;
+    cudf::size_type max_used{0};
   };
 
   expression_device_view device_expression_data;  ///< The collection of data required to evaluate
diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp
index 20aaa42fb68..eb98e0e0bee 100644
--- a/cpp/include/cudf/ast/expressions.hpp
+++ b/cpp/include/cudf/ast/expressions.hpp
@@ -38,14 +38,14 @@ class expression_parser;
 struct expression {
   virtual cudf::size_type accept(detail::expression_parser& visitor) const = 0;
 
-  bool may_evaluate_null(table_view const& left, rmm::cuda_stream_view stream) const
+  [[nodiscard]] bool may_evaluate_null(table_view const& left, rmm::cuda_stream_view stream) const
   {
     return may_evaluate_null(left, left, stream);
   }
 
-  virtual bool may_evaluate_null(table_view const& left,
-                                 table_view const& right,
-                                 rmm::cuda_stream_view stream) const = 0;
+  [[nodiscard]] virtual bool may_evaluate_null(table_view const& left,
+                                               table_view const& right,
+                                               rmm::cuda_stream_view stream) const = 0;
 
   virtual ~expression() {}
 };
@@ -173,14 +173,17 @@ class literal : public expression {
    *
    * @return cudf::data_type
    */
-  cudf::data_type get_data_type() const { return get_value().type(); }
+  [[nodiscard]] cudf::data_type get_data_type() const { return get_value().type(); }
 
   /**
    * @brief Get the value object.
    *
    * @return cudf::detail::fixed_width_scalar_device_view_base
    */
-  cudf::detail::fixed_width_scalar_device_view_base get_value() const { return value; }
+  [[nodiscard]] cudf::detail::fixed_width_scalar_device_view_base get_value() const
+  {
+    return value;
+  }
 
   /**
    * @brief Accepts a visitor class.
@@ -190,9 +193,9 @@ class literal : public expression {
    */
   cudf::size_type accept(detail::expression_parser& visitor) const override;
 
-  bool may_evaluate_null(table_view const& left,
-                         table_view const& right,
-                         rmm::cuda_stream_view stream) const override
+  [[nodiscard]] bool may_evaluate_null(table_view const& left,
+                                       table_view const& right,
+                                       rmm::cuda_stream_view stream) const override
   {
     return !is_valid(stream);
   }
@@ -202,7 +205,10 @@ class literal : public expression {
    *
    * @return bool
    */
-  bool is_valid(rmm::cuda_stream_view stream) const { return scalar.is_valid(stream); }
+  [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream) const
+  {
+    return scalar.is_valid(stream);
+  }
 
  private:
   cudf::scalar const& scalar;
@@ -232,14 +238,14 @@ class column_reference : public expression {
    *
    * @return cudf::size_type
    */
-  cudf::size_type get_column_index() const { return column_index; }
+  [[nodiscard]] cudf::size_type get_column_index() const { return column_index; }
 
   /**
    * @brief Get the table source.
    *
    * @return table_reference
    */
-  table_reference get_table_source() const { return table_source; }
+  [[nodiscard]] table_reference get_table_source() const { return table_source; }
 
   /**
    * @brief Get the data type.
@@ -247,7 +253,7 @@ class column_reference : public expression {
    * @param table Table used to determine types.
    * @return cudf::data_type
    */
-  cudf::data_type get_data_type(table_view const& table) const
+  [[nodiscard]] cudf::data_type get_data_type(table_view const& table) const
   {
     return table.column(get_column_index()).type();
   }
@@ -259,7 +265,8 @@ class column_reference : public expression {
    * @param right_table Right table used to determine types.
    * @return cudf::data_type
    */
-  cudf::data_type get_data_type(table_view const& left_table, table_view const& right_table) const
+  [[nodiscard]] cudf::data_type get_data_type(table_view const& left_table,
+                                              table_view const& right_table) const
   {
     auto const table = [&] {
       if (get_table_source() == table_reference::LEFT) {
@@ -281,9 +288,9 @@ class column_reference : public expression {
    */
   cudf::size_type accept(detail::expression_parser& visitor) const override;
 
-  bool may_evaluate_null(table_view const& left,
-                         table_view const& right,
-                         rmm::cuda_stream_view stream) const override
+  [[nodiscard]] bool may_evaluate_null(table_view const& left,
+                                       table_view const& right,
+                                       rmm::cuda_stream_view stream) const override
   {
     return (table_source == table_reference::LEFT ? left : right).column(column_index).has_nulls();
   }
@@ -327,7 +334,7 @@ class operation : public expression {
    *
    * @return ast_operator
    */
-  ast_operator get_operator() const { return op; }
+  [[nodiscard]] ast_operator get_operator() const { return op; }
 
   /**
    * @brief Get the operands.
@@ -344,9 +351,9 @@ class operation : public expression {
    */
   cudf::size_type accept(detail::expression_parser& visitor) const override;
 
-  bool may_evaluate_null(table_view const& left,
-                         table_view const& right,
-                         rmm::cuda_stream_view stream) const override
+  [[nodiscard]] bool may_evaluate_null(table_view const& left,
+                                       table_view const& right,
+                                       rmm::cuda_stream_view stream) const override
   {
     return std::any_of(operands.cbegin(),
                        operands.cend(),
diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp
index a514010c1f0..daf55c0befe 100644
--- a/cpp/include/cudf/binaryop.hpp
+++ b/cpp/include/cudf/binaryop.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -72,7 +72,11 @@ enum class binary_operator : int32_t {
                          ///< operand when one is null; or invalid when both are null
   GENERIC_BINARY,        ///< generic binary operator to be generated with input
                          ///< ptx code
-  INVALID_BINARY         ///< invalid operation
+  NULL_LOGICAL_AND,  ///< operator && with Spark rules: (null, null) is null, (null, true) is null,
+                     ///< (null, false) is false, and (valid, valid) == LOGICAL_AND(valid, valid)
+  NULL_LOGICAL_OR,   ///< operator || with Spark rules: (null, null) is null, (null, true) is true,
+                     ///< (null, false) is null, and (valid, valid) == LOGICAL_OR(valid, valid)
+  INVALID_BINARY     ///< invalid operation
 };
 /**
  * @brief Performs a binary operation between a scalar and a column.
diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp
index 8decce7f260..7869f9bd2aa 100644
--- a/cpp/include/cudf/column/column.hpp
+++ b/cpp/include/cudf/column/column.hpp
@@ -122,12 +122,12 @@ class column {
   /**
    * @brief Returns the column's logical element type
    */
-  data_type type() const noexcept { return _type; }
+  [[nodiscard]] data_type type() const noexcept { return _type; }
 
   /**
    * @brief Returns the number of elements
    */
-  size_type size() const noexcept { return _size; }
+  [[nodiscard]] size_type size() const noexcept { return _size; }
 
   /**
    * @brief Returns the count of null elements.
@@ -137,7 +137,7 @@ class column {
    * first invocation of `null_count()` will compute and store the count of null
    * elements indicated by the `null_mask` (if it exists).
    */
-  size_type null_count() const;
+  [[nodiscard]] size_type null_count() const;
 
   /**
    * @brief Sets the column's null value indicator bitmask to `new_null_mask`.
@@ -199,7 +199,7 @@ class column {
    * @return true The column can hold null values
    * @return false The column cannot hold null values
    */
-  bool nullable() const noexcept { return (_null_mask.size() > 0); }
+  [[nodiscard]] bool nullable() const noexcept { return (_null_mask.size() > 0); }
 
   /**
    * @brief Indicates whether the column contains null elements.
@@ -207,12 +207,12 @@ class column {
    * @return true One or more elements are null
    * @return false Zero elements are null
    */
-  bool has_nulls() const noexcept { return (null_count() > 0); }
+  [[nodiscard]] bool has_nulls() const noexcept { return (null_count() > 0); }
 
   /**
    * @brief Returns the number of child columns
    */
-  size_type num_children() const noexcept { return _children.size(); }
+  [[nodiscard]] size_type num_children() const noexcept { return _children.size(); }
 
   /**
    * @brief Returns a reference to the specified child
@@ -228,7 +228,10 @@ class column {
    * @param child_index Index of the desired child
    * @return column const& Const reference to the desired child
    */
-  column const& child(size_type child_index) const noexcept { return *_children[child_index]; };
+  [[nodiscard]] column const& child(size_type child_index) const noexcept
+  {
+    return *_children[child_index];
+  };
 
   /**
    * @brief Wrapper for the contents of a column.
@@ -264,7 +267,7 @@ class column {
    *
    * @return column_view The immutable, non-owning view
    */
-  column_view view() const;
+  [[nodiscard]] column_view view() const;
 
   /**
    * @brief Implicit conversion operator to a `column_view`.
diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
index b29df1852b2..d2332ef9026 100644
--- a/cpp/include/cudf/column/column_device_view.cuh
+++ b/cpp/include/cudf/column/column_device_view.cuh
@@ -139,12 +139,12 @@ class alignas(16) column_device_view_base {
   /**
    * @brief Returns the number of elements in the column.
    */
-  __host__ __device__ size_type size() const noexcept { return _size; }
+  [[nodiscard]] __host__ __device__ size_type size() const noexcept { return _size; }
 
   /**
    * @brief Returns the element type
    */
-  __host__ __device__ data_type type() const noexcept { return _type; }
+  [[nodiscard]] __host__ __device__ data_type type() const noexcept { return _type; }
 
   /**
    * @brief Indicates whether the column can contain null elements, i.e., if it
@@ -155,7 +155,7 @@ class alignas(16) column_device_view_base {
    * @return true The bitmask is allocated
    * @return false The bitmask is not allocated
    */
-  __host__ __device__ bool nullable() const noexcept { return nullptr != _null_mask; }
+  [[nodiscard]] __host__ __device__ bool nullable() const noexcept { return nullptr != _null_mask; }
 
   /**
    * @brief Returns raw pointer to the underlying bitmask allocation.
@@ -164,13 +164,16 @@ class alignas(16) column_device_view_base {
    *
    * @note If `null_count() == 0`, this may return `nullptr`.
    */
-  __host__ __device__ bitmask_type const* null_mask() const noexcept { return _null_mask; }
+  [[nodiscard]] __host__ __device__ bitmask_type const* null_mask() const noexcept
+  {
+    return _null_mask;
+  }
 
   /**
    * @brief Returns the index of the first element relative to the base memory
    * allocation, i.e., what is returned from `head<T>()`.
    */
-  __host__ __device__ size_type offset() const noexcept { return _offset; }
+  [[nodiscard]] __host__ __device__ size_type offset() const noexcept { return _offset; }
 
   /**
    * @brief Returns whether the specified element holds a valid value (i.e., not
@@ -186,7 +189,7 @@ class alignas(16) column_device_view_base {
    * @return true The element is valid
    * @return false The element is null
    */
-  __device__ bool is_valid(size_type element_index) const noexcept
+  [[nodiscard]] __device__ bool is_valid(size_type element_index) const noexcept
   {
     return not nullable() or is_valid_nocheck(element_index);
   }
@@ -203,7 +206,7 @@ class alignas(16) column_device_view_base {
    * @return true The element is valid
    * @return false The element is null
    */
-  __device__ bool is_valid_nocheck(size_type element_index) const noexcept
+  [[nodiscard]] __device__ bool is_valid_nocheck(size_type element_index) const noexcept
   {
     return bit_is_set(_null_mask, offset() + element_index);
   }
@@ -221,7 +224,7 @@ class alignas(16) column_device_view_base {
    * @return true The element is null
    * @return false The element is valid
    */
-  __device__ bool is_null(size_type element_index) const noexcept
+  [[nodiscard]] __device__ bool is_null(size_type element_index) const noexcept
   {
     return not is_valid(element_index);
   }
@@ -237,7 +240,7 @@ class alignas(16) column_device_view_base {
    * @return true The element is null
    * @return false The element is valid
    */
-  __device__ bool is_null_nocheck(size_type element_index) const noexcept
+  [[nodiscard]] __device__ bool is_null_nocheck(size_type element_index) const noexcept
   {
     return not is_valid_nocheck(element_index);
   }
@@ -251,7 +254,7 @@ class alignas(16) column_device_view_base {
    * @param word_index The index of the word to get
    * @return bitmask word for the given word_index
    */
-  __device__ bitmask_type get_mask_word(size_type word_index) const noexcept
+  [[nodiscard]] __device__ bitmask_type get_mask_word(size_type word_index) const noexcept
   {
     return null_mask()[word_index];
   }
@@ -476,7 +479,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    * For columns with null elements, use `make_null_replacement_iterator`.
    */
   template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
-  const_iterator<T> begin() const
+  [[nodiscard]] const_iterator<T> begin() const
   {
     return const_iterator<T>{count_it{0}, detail::value_accessor<T>{*this}};
   }
@@ -494,7 +497,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    * For columns with null elements, use `make_null_replacement_iterator`.
    */
   template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
-  const_iterator<T> end() const
+  [[nodiscard]] const_iterator<T> end() const
   {
     return const_iterator<T>{count_it{size()}, detail::value_accessor<T>{*this}};
   }
@@ -602,7 +605,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
   template <typename T,
             bool has_nulls,
             CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
-  const_pair_iterator<T, has_nulls> pair_begin() const
+  [[nodiscard]] const_pair_iterator<T, has_nulls> pair_begin() const
   {
     return const_pair_iterator<T, has_nulls>{count_it{0},
                                              detail::pair_accessor<T, has_nulls>{*this}};
@@ -632,7 +635,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
   template <typename T,
             bool has_nulls,
             CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
-  const_pair_rep_iterator<T, has_nulls> pair_rep_begin() const
+  [[nodiscard]] const_pair_rep_iterator<T, has_nulls> pair_rep_begin() const
   {
     return const_pair_rep_iterator<T, has_nulls>{count_it{0},
                                                  detail::pair_rep_accessor<T, has_nulls>{*this}};
@@ -673,7 +676,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
   template <typename T,
             bool has_nulls,
             CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
-  const_pair_iterator<T, has_nulls> pair_end() const
+  [[nodiscard]] const_pair_iterator<T, has_nulls> pair_end() const
   {
     return const_pair_iterator<T, has_nulls>{count_it{size()},
                                              detail::pair_accessor<T, has_nulls>{*this}};
@@ -693,7 +696,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
   template <typename T,
             bool has_nulls,
             CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
-  const_pair_rep_iterator<T, has_nulls> pair_rep_end() const
+  [[nodiscard]] const_pair_rep_iterator<T, has_nulls> pair_rep_end() const
   {
     return const_pair_rep_iterator<T, has_nulls>{count_it{size()},
                                                  detail::pair_rep_accessor<T, has_nulls>{*this}};
@@ -743,7 +746,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    * @param child_index The index of the desired child
    * @return column_view The requested child `column_view`
    */
-  __device__ column_device_view child(size_type child_index) const noexcept
+  [[nodiscard]] __device__ column_device_view child(size_type child_index) const noexcept
   {
     return d_children[child_index];
   }
@@ -751,7 +754,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
   /**
    * @brief Returns a span containing the children of this column
    */
-  __device__ device_span<column_device_view const> children() const noexcept
+  [[nodiscard]] __device__ device_span<column_device_view const> children() const noexcept
   {
     return device_span<column_device_view const>(d_children, _num_children);
   }
@@ -761,7 +764,10 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    *
    * @return The number of child columns
    */
-  __host__ __device__ size_type num_child_columns() const noexcept { return _num_children; }
+  [[nodiscard]] __host__ __device__ size_type num_child_columns() const noexcept
+  {
+    return _num_children;
+  }
 
  protected:
   column_device_view* d_children{};  ///< Array of `column_device_view`
@@ -907,7 +913,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    *
    * @note If `null_count() == 0`, this may return `nullptr`.
    */
-  __host__ __device__ bitmask_type* null_mask() const noexcept
+  [[nodiscard]] __host__ __device__ bitmask_type* null_mask() const noexcept
   {
     return const_cast<bitmask_type*>(detail::column_device_view_base::null_mask());
   }
@@ -957,7 +963,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    * @param child_index The index of the desired child
    * @return column_view The requested child `column_view`
    */
-  __device__ mutable_column_device_view child(size_type child_index) const noexcept
+  [[nodiscard]] __device__ mutable_column_device_view child(size_type child_index) const noexcept
   {
     return d_children[child_index];
   }
diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index 3f335509da8..325f023f283 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -123,17 +123,17 @@ class column_view_base {
   /**
    * @brief Returns the number of elements in the column
    */
-  size_type size() const noexcept { return _size; }
+  [[nodiscard]] size_type size() const noexcept { return _size; }
 
   /**
    * @brief Returns true if `size()` returns zero, or false otherwise
    */
-  size_type is_empty() const noexcept { return size() == 0; }
+  [[nodiscard]] size_type is_empty() const noexcept { return size() == 0; }
 
   /**
    * @brief Returns the element `data_type`
    */
-  data_type type() const noexcept { return _type; }
+  [[nodiscard]] data_type type() const noexcept { return _type; }
 
   /**
    * @brief Indicates if the column can contain null elements, i.e., if it has
@@ -144,7 +144,7 @@ class column_view_base {
    * @return true The bitmask is allocated
    * @return false The bitmask is not allocated
    */
-  bool nullable() const noexcept { return nullptr != _null_mask; }
+  [[nodiscard]] bool nullable() const noexcept { return nullptr != _null_mask; }
 
   /**
    * @brief Returns the count of null elements
@@ -154,7 +154,7 @@ class column_view_base {
    * first invocation of `null_count()` will compute and store the count of null
    * elements indicated by the `null_mask` (if it exists).
    */
-  size_type null_count() const;
+  [[nodiscard]] size_type null_count() const;
 
   /**
    * @brief Returns the count of null elements in the range [begin, end)
@@ -169,7 +169,7 @@ class column_view_base {
    * @param[in] begin The starting index of the range (inclusive).
    * @param[in] end The index of the last element in the range (exclusive).
    */
-  size_type null_count(size_type begin, size_type end) const;
+  [[nodiscard]] size_type null_count(size_type begin, size_type end) const;
 
   /**
    * @brief Indicates if the column contains null elements,
@@ -178,7 +178,7 @@ class column_view_base {
    * @return true One or more elements are null
    * @return false All elements are valid
    */
-  bool has_nulls() const { return null_count() > 0; }
+  [[nodiscard]] bool has_nulls() const { return null_count() > 0; }
 
   /**
    * @brief Indicates if the column contains null elements in the range
@@ -192,7 +192,10 @@ class column_view_base {
    * @return true One or more elements are null in the range [begin, end)
    * @return false All elements are valid in the range [begin, end)
    */
-  bool has_nulls(size_type begin, size_type end) const { return null_count(begin, end) > 0; }
+  [[nodiscard]] bool has_nulls(size_type begin, size_type end) const
+  {
+    return null_count(begin, end) > 0;
+  }
 
   /**
    * @brief Returns raw pointer to the underlying bitmask allocation.
@@ -201,13 +204,13 @@ class column_view_base {
    *
    * @note If `null_count() == 0`, this may return `nullptr`.
    */
-  bitmask_type const* null_mask() const noexcept { return _null_mask; }
+  [[nodiscard]] bitmask_type const* null_mask() const noexcept { return _null_mask; }
 
   /**
    * @brief Returns the index of the first element relative to the base memory
    * allocation, i.e., what is returned from `head<T>()`.
    */
-  size_type offset() const noexcept { return _offset; }
+  [[nodiscard]] size_type offset() const noexcept { return _offset; }
 
  protected:
   data_type _type{type_id::EMPTY};   ///< Element type
@@ -352,12 +355,15 @@ class column_view : public detail::column_view_base {
    * @param child_index The index of the desired child
    * @return column_view The requested child `column_view`
    */
-  column_view child(size_type child_index) const noexcept { return _children[child_index]; }
+  [[nodiscard]] column_view child(size_type child_index) const noexcept
+  {
+    return _children[child_index];
+  }
 
   /**
    * @brief Returns the number of child columns.
    */
-  size_type num_children() const noexcept { return _children.size(); }
+  [[nodiscard]] size_type num_children() const noexcept { return _children.size(); }
 
   /**
    * @brief Returns iterator to the beginning of the ordered sequence of child column-views.
@@ -524,7 +530,7 @@ class mutable_column_view : public detail::column_view_base {
    *
    * @note If `null_count() == 0`, this may return `nullptr`.
    */
-  bitmask_type* null_mask() const noexcept
+  [[nodiscard]] bitmask_type* null_mask() const noexcept
   {
     return const_cast<bitmask_type*>(detail::column_view_base::null_mask());
   }
@@ -544,7 +550,7 @@ class mutable_column_view : public detail::column_view_base {
    * @param child_index The index of the desired child
    * @return mutable_column_view The requested child `mutable_column_view`
    */
-  mutable_column_view child(size_type child_index) const noexcept
+  [[nodiscard]] mutable_column_view child(size_type child_index) const noexcept
   {
     return mutable_children[child_index];
   }
@@ -552,7 +558,7 @@ class mutable_column_view : public detail::column_view_base {
   /**
    * @brief Returns the number of child columns.
    */
-  size_type num_children() const noexcept { return mutable_children.size(); }
+  [[nodiscard]] size_type num_children() const noexcept { return mutable_children.size(); }
 
   /**
    * @brief Returns iterator to the beginning of the ordered sequence of child column-views.
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 81dddbd284a..850a11426af 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -553,8 +553,8 @@ struct packed_columns {
   struct metadata {
     metadata() = default;
     metadata(std::vector<uint8_t>&& v) : data_(std::move(v)) {}
-    uint8_t const* data() const { return data_.data(); }
-    size_t size() const { return data_.size(); }
+    [[nodiscard]] uint8_t const* data() const { return data_.data(); }
+    [[nodiscard]] size_t size() const { return data_.size(); }
 
    private:
     std::vector<uint8_t> data_;
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index 3674efbcc7b..fbf315776f4 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -148,7 +148,7 @@ class sum_aggregation final : public rolling_aggregation,
  public:
   sum_aggregation() : aggregation(SUM) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<sum_aggregation>(*this);
   }
@@ -167,7 +167,7 @@ class product_aggregation final : public groupby_aggregation {
  public:
   product_aggregation() : aggregation(PRODUCT) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<product_aggregation>(*this);
   }
@@ -188,7 +188,7 @@ class min_aggregation final : public rolling_aggregation,
  public:
   min_aggregation() : aggregation(MIN) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<min_aggregation>(*this);
   }
@@ -209,7 +209,7 @@ class max_aggregation final : public rolling_aggregation,
  public:
   max_aggregation() : aggregation(MAX) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<max_aggregation>(*this);
   }
@@ -230,7 +230,7 @@ class count_aggregation final : public rolling_aggregation,
  public:
   count_aggregation(aggregation::Kind kind) : aggregation(kind) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<count_aggregation>(*this);
   }
@@ -249,7 +249,7 @@ class any_aggregation final : public aggregation {
  public:
   any_aggregation() : aggregation(ANY) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<any_aggregation>(*this);
   }
@@ -268,7 +268,7 @@ class all_aggregation final : public aggregation {
  public:
   all_aggregation() : aggregation(ALL) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<all_aggregation>(*this);
   }
@@ -287,7 +287,7 @@ class sum_of_squares_aggregation final : public groupby_aggregation {
  public:
   sum_of_squares_aggregation() : aggregation(SUM_OF_SQUARES) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<sum_of_squares_aggregation>(*this);
   }
@@ -306,7 +306,7 @@ class mean_aggregation final : public rolling_aggregation, public groupby_aggreg
  public:
   mean_aggregation() : aggregation(MEAN) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<mean_aggregation>(*this);
   }
@@ -325,7 +325,7 @@ class m2_aggregation : public groupby_aggregation {
  public:
   m2_aggregation() : aggregation{M2} {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<m2_aggregation>(*this);
   }
@@ -344,14 +344,17 @@ class std_var_aggregation : public rolling_aggregation, public groupby_aggregati
  public:
   size_type _ddof;  ///< Delta degrees of freedom
 
-  bool is_equal(aggregation const& _other) const override
+  [[nodiscard]] bool is_equal(aggregation const& _other) const override
   {
     if (!this->aggregation::is_equal(_other)) { return false; }
     auto const& other = dynamic_cast<std_var_aggregation const&>(_other);
     return _ddof == other._ddof;
   }
 
-  size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); }
+  [[nodiscard]] size_t do_hash() const override
+  {
+    return this->aggregation::do_hash() ^ hash_impl();
+  }
 
  protected:
   std_var_aggregation(aggregation::Kind k, size_type ddof) : rolling_aggregation(k), _ddof{ddof}
@@ -359,7 +362,7 @@ class std_var_aggregation : public rolling_aggregation, public groupby_aggregati
     CUDF_EXPECTS(k == aggregation::STD or k == aggregation::VARIANCE,
                  "std_var_aggregation can accept only STD, VARIANCE");
   }
-  size_type hash_impl() const { return std::hash<size_type>{}(_ddof); }
+  [[nodiscard]] size_type hash_impl() const { return std::hash<size_type>{}(_ddof); }
 };
 
 /**
@@ -372,7 +375,7 @@ class var_aggregation final : public std_var_aggregation {
   {
   }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<var_aggregation>(*this);
   }
@@ -394,7 +397,7 @@ class std_aggregation final : public std_var_aggregation {
   {
   }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<std_aggregation>(*this);
   }
@@ -413,7 +416,7 @@ class median_aggregation final : public groupby_aggregation {
  public:
   median_aggregation() : aggregation(MEDIAN) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<median_aggregation>(*this);
   }
@@ -437,7 +440,7 @@ class quantile_aggregation final : public groupby_aggregation {
   std::vector<double> _quantiles;  ///< Desired quantile(s)
   interpolation _interpolation;    ///< Desired interpolation
 
-  bool is_equal(aggregation const& _other) const override
+  [[nodiscard]] bool is_equal(aggregation const& _other) const override
   {
     if (!this->aggregation::is_equal(_other)) { return false; }
 
@@ -447,9 +450,12 @@ class quantile_aggregation final : public groupby_aggregation {
            std::equal(_quantiles.begin(), _quantiles.end(), other._quantiles.begin());
   }
 
-  size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); }
+  [[nodiscard]] size_t do_hash() const override
+  {
+    return this->aggregation::do_hash() ^ hash_impl();
+  }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<quantile_aggregation>(*this);
   }
@@ -478,7 +484,7 @@ class argmax_aggregation final : public rolling_aggregation, public groupby_aggr
  public:
   argmax_aggregation() : aggregation(ARGMAX) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<argmax_aggregation>(*this);
   }
@@ -497,7 +503,7 @@ class argmin_aggregation final : public rolling_aggregation, public groupby_aggr
  public:
   argmin_aggregation() : aggregation(ARGMIN) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<argmin_aggregation>(*this);
   }
@@ -521,16 +527,19 @@ class nunique_aggregation final : public groupby_aggregation {
 
   null_policy _null_handling;  ///< include or exclude nulls
 
-  bool is_equal(aggregation const& _other) const override
+  [[nodiscard]] bool is_equal(aggregation const& _other) const override
   {
     if (!this->aggregation::is_equal(_other)) { return false; }
     auto const& other = dynamic_cast<nunique_aggregation const&>(_other);
     return _null_handling == other._null_handling;
   }
 
-  size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); }
+  [[nodiscard]] size_t do_hash() const override
+  {
+    return this->aggregation::do_hash() ^ hash_impl();
+  }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<nunique_aggregation>(*this);
   }
@@ -558,16 +567,19 @@ class nth_element_aggregation final : public groupby_aggregation {
   size_type _n;                ///< nth index to return
   null_policy _null_handling;  ///< include or exclude nulls
 
-  bool is_equal(aggregation const& _other) const override
+  [[nodiscard]] bool is_equal(aggregation const& _other) const override
   {
     if (!this->aggregation::is_equal(_other)) { return false; }
     auto const& other = dynamic_cast<nth_element_aggregation const&>(_other);
     return _n == other._n and _null_handling == other._null_handling;
   }
 
-  size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); }
+  [[nodiscard]] size_t do_hash() const override
+  {
+    return this->aggregation::do_hash() ^ hash_impl();
+  }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<nth_element_aggregation>(*this);
   }
@@ -592,7 +604,7 @@ class row_number_aggregation final : public rolling_aggregation {
  public:
   row_number_aggregation() : aggregation(ROW_NUMBER) {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<row_number_aggregation>(*this);
   }
@@ -611,7 +623,7 @@ class rank_aggregation final : public rolling_aggregation, public groupby_scan_a
  public:
   rank_aggregation() : aggregation{RANK} {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<rank_aggregation>(*this);
   }
@@ -630,7 +642,7 @@ class dense_rank_aggregation final : public rolling_aggregation, public groupby_
  public:
   dense_rank_aggregation() : aggregation{DENSE_RANK} {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<dense_rank_aggregation>(*this);
   }
@@ -654,16 +666,19 @@ class collect_list_aggregation final : public rolling_aggregation, public groupb
 
   null_policy _null_handling;  ///< include or exclude nulls
 
-  bool is_equal(aggregation const& _other) const override
+  [[nodiscard]] bool is_equal(aggregation const& _other) const override
   {
     if (!this->aggregation::is_equal(_other)) { return false; }
     auto const& other = dynamic_cast<collect_list_aggregation const&>(_other);
     return (_null_handling == other._null_handling);
   }
 
-  size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); }
+  [[nodiscard]] size_t do_hash() const override
+  {
+    return this->aggregation::do_hash() ^ hash_impl();
+  }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<collect_list_aggregation>(*this);
   }
@@ -698,7 +713,7 @@ class collect_set_aggregation final : public rolling_aggregation, public groupby
   nan_equality _nans_equal;    ///< whether to consider NaNs as equal value (applicable only to
                                ///< floating point types)
 
-  bool is_equal(aggregation const& _other) const override
+  [[nodiscard]] bool is_equal(aggregation const& _other) const override
   {
     if (!this->aggregation::is_equal(_other)) { return false; }
     auto const& other = dynamic_cast<collect_set_aggregation const&>(_other);
@@ -706,9 +721,12 @@ class collect_set_aggregation final : public rolling_aggregation, public groupby
             _nans_equal == other._nans_equal);
   }
 
-  size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); }
+  [[nodiscard]] size_t do_hash() const override
+  {
+    return this->aggregation::do_hash() ^ hash_impl();
+  }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<collect_set_aggregation>(*this);
   }
@@ -737,16 +755,19 @@ class lead_lag_aggregation final : public rolling_aggregation {
   {
   }
 
-  bool is_equal(aggregation const& _other) const override
+  [[nodiscard]] bool is_equal(aggregation const& _other) const override
   {
     if (!this->aggregation::is_equal(_other)) { return false; }
     auto const& other = dynamic_cast<lead_lag_aggregation const&>(_other);
     return (row_offset == other.row_offset);
   }
 
-  size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); }
+  [[nodiscard]] size_t do_hash() const override
+  {
+    return this->aggregation::do_hash() ^ hash_impl();
+  }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<lead_lag_aggregation>(*this);
   }
@@ -760,7 +781,7 @@ class lead_lag_aggregation final : public rolling_aggregation {
   size_type row_offset;
 
  private:
-  size_t hash_impl() const { return std::hash<size_type>()(row_offset); }
+  [[nodiscard]] size_t hash_impl() const { return std::hash<size_type>()(row_offset); }
 };
 
 /**
@@ -782,7 +803,7 @@ class udf_aggregation final : public rolling_aggregation {
                  "udf_aggregation can accept only PTX, CUDA");
   }
 
-  bool is_equal(aggregation const& _other) const override
+  [[nodiscard]] bool is_equal(aggregation const& _other) const override
   {
     if (!this->aggregation::is_equal(_other)) { return false; }
     auto const& other = dynamic_cast<udf_aggregation const&>(_other);
@@ -790,9 +811,12 @@ class udf_aggregation final : public rolling_aggregation {
             _function_name == other._function_name and _output_type == other._output_type);
   }
 
-  size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); }
+  [[nodiscard]] size_t do_hash() const override
+  {
+    return this->aggregation::do_hash() ^ hash_impl();
+  }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<udf_aggregation>(*this);
   }
@@ -809,7 +833,7 @@ class udf_aggregation final : public rolling_aggregation {
   data_type _output_type;
 
  protected:
-  size_t hash_impl() const
+  [[nodiscard]] size_t hash_impl() const
   {
     return std::hash<std::string>{}(_source) ^ std::hash<std::string>{}(_operator_name) ^
            std::hash<std::string>{}(_function_name) ^
@@ -824,7 +848,7 @@ class merge_lists_aggregation final : public groupby_aggregation {
  public:
   explicit merge_lists_aggregation() : aggregation{MERGE_LISTS} {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<merge_lists_aggregation>(*this);
   }
@@ -850,16 +874,19 @@ class merge_sets_aggregation final : public groupby_aggregation {
   nan_equality _nans_equal;    ///< whether to consider NaNs as equal value (applicable only to
                                ///< floating point types)
 
-  bool is_equal(aggregation const& _other) const override
+  [[nodiscard]] bool is_equal(aggregation const& _other) const override
   {
     if (!this->aggregation::is_equal(_other)) { return false; }
     auto const& other = dynamic_cast<merge_sets_aggregation const&>(_other);
     return (_nulls_equal == other._nulls_equal && _nans_equal == other._nans_equal);
   }
 
-  size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); }
+  [[nodiscard]] size_t do_hash() const override
+  {
+    return this->aggregation::do_hash() ^ hash_impl();
+  }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<merge_sets_aggregation>(*this);
   }
@@ -884,7 +911,7 @@ class merge_m2_aggregation final : public groupby_aggregation {
  public:
   explicit merge_m2_aggregation() : aggregation{MERGE_M2} {}
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<merge_m2_aggregation>(*this);
   }
@@ -908,9 +935,12 @@ class covariance_aggregation final : public groupby_aggregation {
   size_type _min_periods;
   size_type _ddof;
 
-  size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); }
+  [[nodiscard]] size_t do_hash() const override
+  {
+    return this->aggregation::do_hash() ^ hash_impl();
+  }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<covariance_aggregation>(*this);
   }
@@ -940,16 +970,19 @@ class correlation_aggregation final : public groupby_aggregation {
   correlation_type _type;
   size_type _min_periods;
 
-  bool is_equal(aggregation const& _other) const override
+  [[nodiscard]] bool is_equal(aggregation const& _other) const override
   {
     if (!this->aggregation::is_equal(_other)) { return false; }
     auto const& other = dynamic_cast<correlation_aggregation const&>(_other);
     return (_type == other._type);
   }
 
-  size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); }
+  [[nodiscard]] size_t do_hash() const override
+  {
+    return this->aggregation::do_hash() ^ hash_impl();
+  }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<correlation_aggregation>(*this);
   }
@@ -979,7 +1012,7 @@ class tdigest_aggregation final : public groupby_aggregation {
 
   int const max_centroids;
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<tdigest_aggregation>(*this);
   }
@@ -1003,7 +1036,7 @@ class merge_tdigest_aggregation final : public groupby_aggregation {
 
   int const max_centroids;
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<merge_tdigest_aggregation>(*this);
   }
diff --git a/cpp/include/cudf/detail/aggregation/result_cache.hpp b/cpp/include/cudf/detail/aggregation/result_cache.hpp
index 41f5c19f06a..4409d7e0d73 100644
--- a/cpp/include/cudf/detail/aggregation/result_cache.hpp
+++ b/cpp/include/cudf/detail/aggregation/result_cache.hpp
@@ -49,11 +49,11 @@ class result_cache {
 
   result_cache(size_t num_columns) : _cache(num_columns) {}
 
-  bool has_result(column_view const& input, aggregation const& agg) const;
+  [[nodiscard]] bool has_result(column_view const& input, aggregation const& agg) const;
 
   void add_result(column_view const& input, aggregation const& agg, std::unique_ptr<column>&& col);
 
-  column_view get_result(column_view const& input, aggregation const& agg) const;
+  [[nodiscard]] column_view get_result(column_view const& input, aggregation const& agg) const;
 
   std::unique_ptr<column> release_result(column_view const& input, aggregation const& agg);
 
diff --git a/cpp/include/cudf/detail/merge.cuh b/cpp/include/cudf/detail/merge.cuh
index ee5cb5c265d..1debef17db7 100644
--- a/cpp/include/cudf/detail/merge.cuh
+++ b/cpp/include/cudf/detail/merge.cuh
@@ -77,8 +77,8 @@ struct tagged_element_relational_comparator {
   {
   }
 
-  __device__ weak_ordering compare(index_type lhs_tagged_index,
-                                   index_type rhs_tagged_index) const noexcept
+  [[nodiscard]] __device__ weak_ordering compare(index_type lhs_tagged_index,
+                                                 index_type rhs_tagged_index) const noexcept
   {
     auto const [l_side, l_indx] = lhs_tagged_index;
     auto const [r_side, r_indx] = rhs_tagged_index;
diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp
index 6f32e3190bf..751b7c00e8a 100644
--- a/cpp/include/cudf/detail/structs/utilities.hpp
+++ b/cpp/include/cudf/detail/structs/utilities.hpp
@@ -106,17 +106,17 @@ class flattened_table {
   /**
    * @brief Getter for the flattened columns, as a `table_view`.
    */
-  table_view flattened_columns() const { return _flattened_columns; }
+  [[nodiscard]] table_view flattened_columns() const { return _flattened_columns; }
 
   /**
    * @brief Getter for the cudf::order of the table_view's columns.
    */
-  std::vector<order> orders() const { return _orders; }
+  [[nodiscard]] std::vector<order> orders() const { return _orders; }
 
   /**
    * @brief Getter for the cudf::null_order of the table_view's columns.
    */
-  std::vector<null_order> null_orders() const { return _null_orders; }
+  [[nodiscard]] std::vector<null_order> null_orders() const { return _null_orders; }
 
   /**
    * @brief Conversion to `table_view`, to fetch flattened columns.
diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh
index 6380e76fdfa..b8ea228383d 100644
--- a/cpp/include/cudf/detail/utilities/device_atomics.cuh
+++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh
@@ -62,8 +62,8 @@ struct genericAtomicOperationImpl<T, Op, 1> {
   {
     using T_int = unsigned int;
 
-    T_int* address_uint32 = reinterpret_cast<T_int*>(addr - (reinterpret_cast<size_t>(addr) & 3));
-    T_int shift           = ((reinterpret_cast<size_t>(addr) & 3) * 8);
+    auto* address_uint32 = reinterpret_cast<T_int*>(addr - (reinterpret_cast<size_t>(addr) & 3));
+    T_int shift          = ((reinterpret_cast<size_t>(addr) & 3) * 8);
 
     T_int old = *address_uint32;
     T_int assumed;
@@ -87,7 +87,7 @@ struct genericAtomicOperationImpl<T, Op, 2> {
   {
     using T_int      = unsigned int;
     bool is_32_align = (reinterpret_cast<size_t>(addr) & 2) ? false : true;
-    T_int* address_uint32 =
+    auto* address_uint32 =
       reinterpret_cast<T_int*>(reinterpret_cast<size_t>(addr) - (is_32_align ? 0 : 2));
 
     T_int old = *address_uint32;
@@ -322,8 +322,8 @@ struct typesAtomicCASImpl<T, 1> {
   {
     using T_int = unsigned int;
 
-    T_int shift           = ((reinterpret_cast<size_t>(addr) & 3) * 8);
-    T_int* address_uint32 = reinterpret_cast<T_int*>(addr - (reinterpret_cast<size_t>(addr) & 3));
+    T_int shift          = ((reinterpret_cast<size_t>(addr) & 3) * 8);
+    auto* address_uint32 = reinterpret_cast<T_int*>(addr - (reinterpret_cast<size_t>(addr) & 3));
 
     // the 'target_value' in `old` can be different from `compare`
     // because other thread may update the value
@@ -355,7 +355,7 @@ struct typesAtomicCASImpl<T, 2> {
     using T_int = unsigned int;
 
     bool is_32_align = (reinterpret_cast<size_t>(addr) & 2) ? false : true;
-    T_int* address_uint32 =
+    auto* address_uint32 =
       reinterpret_cast<T_int*>(reinterpret_cast<size_t>(addr) - (is_32_align ? 0 : 2));
 
     T_int old = *address_uint32;
diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index c35d24ddeac..b5ca5a3590e 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -16,12 +16,16 @@
 
 #pragma once
 
+#include <cstddef>
+
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/detail/utilities/assert.cuh>
 #include <cudf/fixed_point/fixed_point.hpp>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/types.hpp>
 
+#include <thrust/iterator/reverse_iterator.h>
+
 using hash_value_type = uint32_t;
 
 namespace cudf {
@@ -86,12 +90,12 @@ struct MurmurHash3_32 {
   MurmurHash3_32() = default;
   constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {}
 
-  __device__ inline uint32_t rotl32(uint32_t x, int8_t r) const
+  [[nodiscard]] __device__ inline uint32_t rotl32(uint32_t x, int8_t r) const
   {
     return (x << r) | (x >> (32 - r));
   }
 
-  __device__ inline uint32_t fmix32(uint32_t h) const
+  [[nodiscard]] __device__ inline uint32_t fmix32(uint32_t h) const
   {
     h ^= h >> 16;
     h *= 0x85ebca6b;
@@ -337,17 +341,21 @@ struct SparkMurmurHash3_32 {
   template <typename TKey>
   result_type __device__ inline compute(TKey const& key) const
   {
-    constexpr int len        = sizeof(TKey);
-    int8_t const* const data = reinterpret_cast<int8_t const*>(&key);
-    constexpr int nblocks    = len / 4;
+    return compute_bytes(reinterpret_cast<std::byte const*>(&key), sizeof(TKey));
+  }
+
+  result_type __device__ compute_bytes(std::byte const* const data, cudf::size_type const len) const
+  {
+    constexpr cudf::size_type block_size = sizeof(uint32_t) / sizeof(std::byte);
+    cudf::size_type const nblocks        = len / block_size;
+    uint32_t h1                          = m_seed;
+    constexpr uint32_t c1                = 0xcc9e2d51;
+    constexpr uint32_t c2                = 0x1b873593;
 
-    uint32_t h1           = m_seed;
-    constexpr uint32_t c1 = 0xcc9e2d51;
-    constexpr uint32_t c2 = 0x1b873593;
     //----------
-    // body
-    uint32_t const* const blocks = reinterpret_cast<uint32_t const*>(data + nblocks * 4);
-    for (int i = -nblocks; i; i++) {
+    // Process all four-byte chunks
+    uint32_t const* const blocks = reinterpret_cast<uint32_t const*>(data);
+    for (cudf::size_type i = 0; i < nblocks; i++) {
       uint32_t k1 = blocks[i];
       k1 *= c1;
       k1 = rotl32(k1, 15);
@@ -357,9 +365,14 @@ struct SparkMurmurHash3_32 {
       h1 = h1 * 5 + 0xe6546b64;
     }
     //----------
-    // byte by byte tail processing
-    for (int i = nblocks * 4; i < len; i++) {
-      int32_t k1 = data[i];
+    // Process remaining bytes that do not fill a four-byte chunk using Spark's approach
+    // (does not conform to normal MurmurHash3)
+    for (cudf::size_type i = nblocks * 4; i < len; i++) {
+      // We require a two-step cast to get the k1 value from the byte. First,
+      // we must cast to a signed int8_t. Then, the sign bit is preserved when
+      // casting to uint32_t under 2's complement. Java preserves the
+      // signedness when casting byte-to-int, but C++ does not.
+      uint32_t k1 = static_cast<uint32_t>(std::to_integer<int8_t>(data[i]));
       k1 *= c1;
       k1 = rotl32(k1, 15);
       k1 *= c2;
@@ -427,7 +440,42 @@ template <>
 hash_value_type __device__ inline SparkMurmurHash3_32<numeric::decimal128>::operator()(
   numeric::decimal128 const& key) const
 {
-  return this->compute<__int128_t>(key.value());
+  // Generates the Spark MurmurHash3 hash value, mimicking the conversion:
+  // java.math.BigDecimal.valueOf(unscaled_value, _scale).unscaledValue().toByteArray()
+  // https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala#L381
+  __int128_t const val               = key.value();
+  constexpr cudf::size_type key_size = sizeof(__int128_t);
+  std::byte const* data              = reinterpret_cast<std::byte const*>(&val);
+
+  // Small negative values start with 0xff..., small positive values start with 0x00...
+  bool const is_negative     = val < 0;
+  std::byte const zero_value = is_negative ? std::byte{0xff} : std::byte{0x00};
+
+  // If the value can be represented with a shorter than 16-byte integer, the
+  // leading bytes of the little-endian value are truncated and are not hashed.
+  auto const reverse_begin = thrust::reverse_iterator(data + key_size);
+  auto const reverse_end   = thrust::reverse_iterator(data);
+  auto const first_nonzero_byte =
+    thrust::find_if_not(thrust::seq, reverse_begin, reverse_end, [zero_value](std::byte const& v) {
+      return v == zero_value;
+    }).base();
+  // Max handles special case of 0 and -1 which would shorten to 0 length otherwise
+  cudf::size_type length =
+    std::max(1, static_cast<cudf::size_type>(thrust::distance(data, first_nonzero_byte)));
+
+  // Preserve the 2's complement sign bit by adding a byte back on if necessary.
+  // e.g. 0x0000ff would shorten to 0x00ff. The 0x00 byte is retained to
+  // preserve the sign bit, rather than leaving an "f" at the front which would
+  // change the sign bit. However, 0x00007f would shorten to 0x7f. No extra byte
+  // is needed because the leftmost bit matches the sign bit. Similarly for
+  // negative values: 0xffff00 --> 0xff00 and 0xffff80 --> 0x80.
+  if ((length < key_size) && (is_negative ^ bool(data[length - 1] & std::byte{0x80}))) { ++length; }
+
+  // Convert to big endian by reversing the range of nonzero bytes. Only those bytes are hashed.
+  __int128_t big_endian_value = 0;
+  auto big_endian_data        = reinterpret_cast<std::byte*>(&big_endian_value);
+  thrust::reverse_copy(thrust::seq, data, data + length, big_endian_data);
+  return this->compute_bytes(big_endian_data, length);
 }
 
 template <>
@@ -480,7 +528,7 @@ hash_value_type __device__ inline SparkMurmurHash3_32<cudf::string_view>::operat
   //----------
   // Spark's byte by byte tail processing
   for (int i = nblocks * 4; i < len; i++) {
-    int32_t k1 = data[i];
+    uint32_t k1 = data[i];
     k1 *= c1;
     k1 = rotl32(k1, 15);
     k1 *= c2;
diff --git a/cpp/include/cudf/dictionary/dictionary_column_view.hpp b/cpp/include/cudf/dictionary/dictionary_column_view.hpp
index 42f8310040e..33e29e70304 100644
--- a/cpp/include/cudf/dictionary/dictionary_column_view.hpp
+++ b/cpp/include/cudf/dictionary/dictionary_column_view.hpp
@@ -59,33 +59,33 @@ class dictionary_column_view : private column_view {
   /**
    * @brief Returns the parent column.
    */
-  column_view parent() const noexcept;
+  [[nodiscard]] column_view parent() const noexcept;
 
   /**
    * @brief Returns the column of indices
    */
-  column_view indices() const noexcept;
+  [[nodiscard]] column_view indices() const noexcept;
 
   /**
    * @brief Returns a column_view combining the indices data
    * with offset, size, and nulls from the parent.
    */
-  column_view get_indices_annotated() const noexcept;
+  [[nodiscard]] column_view get_indices_annotated() const noexcept;
 
   /**
    * @brief Returns the column of keys
    */
-  column_view keys() const noexcept;
+  [[nodiscard]] column_view keys() const noexcept;
 
   /**
    * @brief Returns the `data_type` of the keys child column.
    */
-  data_type keys_type() const noexcept;
+  [[nodiscard]] data_type keys_type() const noexcept;
 
   /**
    * @brief Returns the number of rows in the keys column.
    */
-  size_type keys_size() const noexcept;
+  [[nodiscard]] size_type keys_size() const noexcept;
 };
 /** @} */  // end of group
 
diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp
index 727dce0db9d..6a85428d8f0 100644
--- a/cpp/include/cudf/fixed_point/fixed_point.hpp
+++ b/cpp/include/cudf/fixed_point/fixed_point.hpp
@@ -82,7 +82,8 @@ template <typename Rep,
 CUDF_HOST_DEVICE inline Rep ipow(T exponent)
 {
   cudf_assert(exponent >= 0 && "integer exponentiation with negative exponent is not possible.");
-  if (exponent == 0) return static_cast<Rep>(1);
+  if (exponent == 0) { return static_cast<Rep>(1); }
+
   auto extra  = static_cast<Rep>(1);
   auto square = static_cast<Rep>(Base);
   while (exponent > 1) {
@@ -146,12 +147,9 @@ CUDF_HOST_DEVICE inline constexpr T left_shift(T const& val, scale_type const& s
 template <typename Rep, Radix Rad, typename T>
 CUDF_HOST_DEVICE inline constexpr T shift(T const& val, scale_type const& scale)
 {
-  if (scale == 0)
-    return val;
-  else if (scale > 0)
-    return right_shift<Rep, Rad>(val, scale);
-  else
-    return left_shift<Rep, Rad>(val, scale);
+  if (scale == 0) { return val; }
+  if (scale > 0) { return right_shift<Rep, Rad>(val, scale); }
+  return left_shift<Rep, Rad>(val, scale);
 }
 
 }  // namespace detail
@@ -193,7 +191,7 @@ struct scaled_integer {
  */
 template <typename Rep, Radix Rad>
 class fixed_point {
-  Rep _value;
+  Rep _value{};
   scale_type _scale;
 
  public:
@@ -258,7 +256,7 @@ class fixed_point {
    * @brief Default constructor that constructs `fixed_point` number with a
    * value and scale of zero
    */
-  CUDF_HOST_DEVICE inline fixed_point() : _value{0}, _scale{scale_type{0}} {}
+  CUDF_HOST_DEVICE inline fixed_point() : _scale{scale_type{0}} {}
 
   /**
    * @brief Explicit conversion operator for casting to floating point types
@@ -543,7 +541,7 @@ class fixed_point {
    */
   CUDF_HOST_DEVICE inline fixed_point<Rep, Rad> rescaled(scale_type scale) const
   {
-    if (scale == _scale) return *this;
+    if (scale == _scale) { return *this; }
     Rep const value = detail::shift<Rep, Rad>(_value, scale_type{scale - _scale});
     return fixed_point<Rep, Rad>{scaled_integer<Rep>{value, scale}};
   }
@@ -563,10 +561,9 @@ class fixed_point {
       auto const sign  = _value < 0 ? std::string("-") : std::string();
       return sign + detail::to_string(av / n) + std::string(".") + zeros +
              detail::to_string(av % n);
-    } else {
-      auto const zeros = std::string(_scale, '0');
-      return detail::to_string(_value) + zeros;
     }
+    auto const zeros = std::string(_scale, '0');
+    return detail::to_string(_value) + zeros;
   }
 };
 
@@ -628,12 +625,9 @@ CUDF_HOST_DEVICE inline auto multiplication_overflow(T lhs, T rhs)
 {
   auto const min = cuda::std::numeric_limits<Rep>::min();
   auto const max = cuda::std::numeric_limits<Rep>::max();
-  if (rhs > 0)
-    return lhs > max / rhs || lhs < min / rhs;
-  else if (rhs < -1)
-    return lhs > min / rhs || lhs < max / rhs;
-  else
-    return rhs == -1 && lhs == min;
+  if (rhs > 0) { return lhs > max / rhs || lhs < min / rhs; }
+  if (rhs < -1) { return lhs > min / rhs || lhs < max / rhs; }
+  return rhs == -1 && lhs == min;
 }
 
 // PLUS Operation
diff --git a/cpp/include/cudf/io/avro.hpp b/cpp/include/cudf/io/avro.hpp
index 4e8bd65672f..0e00d14291d 100644
--- a/cpp/include/cudf/io/avro.hpp
+++ b/cpp/include/cudf/io/avro.hpp
@@ -74,22 +74,22 @@ class avro_reader_options {
   /**
    * @brief Returns source info.
    */
-  source_info const& get_source() const { return _source; }
+  [[nodiscard]] source_info const& get_source() const { return _source; }
 
   /**
    * @brief Returns names of the columns to be read.
    */
-  std::vector<std::string> get_columns() const { return _columns; }
+  [[nodiscard]] std::vector<std::string> get_columns() const { return _columns; }
 
   /**
    * @brief Returns number of rows to skip from the start.
    */
-  size_type get_skip_rows() const { return _skip_rows; }
+  [[nodiscard]] size_type get_skip_rows() const { return _skip_rows; }
 
   /**
    * @brief Returns number of rows to read.
    */
-  size_type get_num_rows() const { return _num_rows; }
+  [[nodiscard]] size_type get_num_rows() const { return _num_rows; }
 
   /**
    * @brief Set names of the column to be read.
diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp
index 89719cb7f67..44ede9b0d63 100644
--- a/cpp/include/cudf/io/csv.hpp
+++ b/cpp/include/cudf/io/csv.hpp
@@ -159,27 +159,27 @@ class csv_reader_options {
   /**
    * @brief Returns source info.
    */
-  source_info const& get_source() const { return _source; }
+  [[nodiscard]] source_info const& get_source() const { return _source; }
 
   /**
    * @brief Returns compression format of the source.
    */
-  compression_type get_compression() const { return _compression; }
+  [[nodiscard]] compression_type get_compression() const { return _compression; }
 
   /**
    * @brief Returns number of bytes to skip from source start.
    */
-  std::size_t get_byte_range_offset() const { return _byte_range_offset; }
+  [[nodiscard]] std::size_t get_byte_range_offset() const { return _byte_range_offset; }
 
   /**
    * @brief Returns number of bytes to read.
    */
-  std::size_t get_byte_range_size() const { return _byte_range_size; }
+  [[nodiscard]] std::size_t get_byte_range_size() const { return _byte_range_size; }
 
   /**
    * @brief Returns number of bytes to read with padding.
    */
-  std::size_t get_byte_range_size_with_padding() const
+  [[nodiscard]] std::size_t get_byte_range_size_with_padding() const
   {
     if (_byte_range_size == 0) {
       return 0;
@@ -191,7 +191,7 @@ class csv_reader_options {
   /**
    * @brief Returns number of bytes to pad when reading.
    */
-  std::size_t get_byte_range_padding() const
+  [[nodiscard]] std::size_t get_byte_range_padding() const
   {
     auto const num_names   = _names.size();
     auto const num_dtypes  = std::visit([](const auto& dtypes) { return dtypes.size(); }, _dtypes);
@@ -213,127 +213,139 @@ class csv_reader_options {
   /**
    * @brief Returns names of the columns.
    */
-  std::vector<std::string> const& get_names() const { return _names; }
+  [[nodiscard]] std::vector<std::string> const& get_names() const { return _names; }
 
   /**
    * @brief Returns prefix to be used for column ID.
    */
-  std::string get_prefix() const { return _prefix; }
+  [[nodiscard]] std::string get_prefix() const { return _prefix; }
 
   /**
    * @brief Whether to rename duplicate column names.
    */
-  bool is_enabled_mangle_dupe_cols() const { return _mangle_dupe_cols; }
+  [[nodiscard]] bool is_enabled_mangle_dupe_cols() const { return _mangle_dupe_cols; }
 
   /**
    * @brief Returns names of the columns to be read.
    */
-  std::vector<std::string> const& get_use_cols_names() const { return _use_cols_names; }
+  [[nodiscard]] std::vector<std::string> const& get_use_cols_names() const
+  {
+    return _use_cols_names;
+  }
 
   /**
    * @brief Returns indexes of columns to read.
    */
-  std::vector<int> const& get_use_cols_indexes() const { return _use_cols_indexes; }
+  [[nodiscard]] std::vector<int> const& get_use_cols_indexes() const { return _use_cols_indexes; }
 
   /**
    * @brief Returns number of rows to read.
    */
-  size_type get_nrows() const { return _nrows; }
+  [[nodiscard]] size_type get_nrows() const { return _nrows; }
 
   /**
    * @brief Returns number of rows to skip from start.
    */
-  size_type get_skiprows() const { return _skiprows; }
+  [[nodiscard]] size_type get_skiprows() const { return _skiprows; }
 
   /**
    * @brief Returns number of rows to skip from end.
    */
-  size_type get_skipfooter() const { return _skipfooter; }
+  [[nodiscard]] size_type get_skipfooter() const { return _skipfooter; }
 
   /**
    * @brief Returns header row index.
    */
-  size_type get_header() const { return _header; }
+  [[nodiscard]] size_type get_header() const { return _header; }
 
   /**
    * @brief Returns line terminator.
    */
-  char get_lineterminator() const { return _lineterminator; }
+  [[nodiscard]] char get_lineterminator() const { return _lineterminator; }
 
   /**
    * @brief Returns field delimiter.
    */
-  char get_delimiter() const { return _delimiter; }
+  [[nodiscard]] char get_delimiter() const { return _delimiter; }
 
   /**
    * @brief Returns numeric data thousands separator.
    */
-  char get_thousands() const { return _thousands; }
+  [[nodiscard]] char get_thousands() const { return _thousands; }
 
   /**
    * @brief Returns decimal point character.
    */
-  char get_decimal() const { return _decimal; }
+  [[nodiscard]] char get_decimal() const { return _decimal; }
 
   /**
    * @brief Returns comment line start character.
    */
-  char get_comment() const { return _comment; }
+  [[nodiscard]] char get_comment() const { return _comment; }
 
   /**
    * @brief Whether to treat `\r\n` as line terminator.
    */
-  bool is_enabled_windowslinetermination() const { return _windowslinetermination; }
+  [[nodiscard]] bool is_enabled_windowslinetermination() const { return _windowslinetermination; }
 
   /**
    * @brief Whether to treat whitespace as field delimiter.
    */
-  bool is_enabled_delim_whitespace() const { return _delim_whitespace; }
+  [[nodiscard]] bool is_enabled_delim_whitespace() const { return _delim_whitespace; }
 
   /**
    * @brief Whether to skip whitespace after the delimiter.
    */
-  bool is_enabled_skipinitialspace() const { return _skipinitialspace; }
+  [[nodiscard]] bool is_enabled_skipinitialspace() const { return _skipinitialspace; }
 
   /**
    * @brief Whether to ignore empty lines or parse line values as invalid.
    */
-  bool is_enabled_skip_blank_lines() const { return _skip_blank_lines; }
+  [[nodiscard]] bool is_enabled_skip_blank_lines() const { return _skip_blank_lines; }
 
   /**
    * @brief Returns quoting style.
    */
-  quote_style get_quoting() const { return _quoting; }
+  [[nodiscard]] quote_style get_quoting() const { return _quoting; }
 
   /**
    * @brief Returns quoting character.
    */
-  char get_quotechar() const { return _quotechar; }
+  [[nodiscard]] char get_quotechar() const { return _quotechar; }
 
   /**
    * @brief Whether a quote inside a value is double-quoted.
    */
-  bool is_enabled_doublequote() const { return _doublequote; }
+  [[nodiscard]] bool is_enabled_doublequote() const { return _doublequote; }
 
   /**
    * @brief Returns names of columns to read as datetime.
    */
-  std::vector<std::string> const& get_parse_dates_names() const { return _parse_dates_names; }
+  [[nodiscard]] std::vector<std::string> const& get_parse_dates_names() const
+  {
+    return _parse_dates_names;
+  }
 
   /**
    * @brief Returns indexes of columns to read as datetime.
    */
-  std::vector<int> const& get_parse_dates_indexes() const { return _parse_dates_indexes; }
+  [[nodiscard]] std::vector<int> const& get_parse_dates_indexes() const
+  {
+    return _parse_dates_indexes;
+  }
 
   /**
    * @brief Returns names of columns to read as hexadecimal.
    */
-  std::vector<std::string> const& get_parse_hex_names() const { return _parse_hex_names; }
+  [[nodiscard]] std::vector<std::string> const& get_parse_hex_names() const
+  {
+    return _parse_hex_names;
+  }
 
   /**
    * @brief Returns indexes of columns to read as hexadecimal.
    */
-  std::vector<int> const& get_parse_hex_indexes() const { return _parse_hex_indexes; }
+  [[nodiscard]] std::vector<int> const& get_parse_hex_indexes() const { return _parse_hex_indexes; }
 
   /**
    * @brief Returns per-column types.
@@ -1277,52 +1289,52 @@ class csv_writer_options {
   /**
    * @brief Returns sink used for writer output.
    */
-  sink_info const& get_sink(void) const { return _sink; }
+  [[nodiscard]] sink_info const& get_sink() const { return _sink; }
 
   /**
    * @brief Returns table that would be written to output.
    */
-  table_view const& get_table(void) const { return _table; }
+  [[nodiscard]] table_view const& get_table() const { return _table; }
 
   /**
    * @brief Returns optional associated metadata.
    */
-  table_metadata const* get_metadata(void) const { return _metadata; }
+  [[nodiscard]] table_metadata const* get_metadata() const { return _metadata; }
 
   /**
    * @brief Returns string to used for null entries.
    */
-  std::string get_na_rep(void) const { return _na_rep; }
+  [[nodiscard]] std::string get_na_rep() const { return _na_rep; }
 
   /**
    * @brief Whether to write headers to csv.
    */
-  bool is_enabled_include_header(void) const { return _include_header; }
+  [[nodiscard]] bool is_enabled_include_header() const { return _include_header; }
 
   /**
    * @brief Returns maximum number of rows to process for each file write.
    */
-  size_type get_rows_per_chunk(void) const { return _rows_per_chunk; }
+  [[nodiscard]] size_type get_rows_per_chunk() const { return _rows_per_chunk; }
 
   /**
    * @brief Returns character used for separating lines.
    */
-  std::string get_line_terminator(void) const { return _line_terminator; }
+  [[nodiscard]] std::string get_line_terminator() const { return _line_terminator; }
 
   /**
    * @brief Returns character used for separating lines.
    */
-  char get_inter_column_delimiter(void) const { return _inter_column_delimiter; }
+  [[nodiscard]] char get_inter_column_delimiter() const { return _inter_column_delimiter; }
 
   /**
    * @brief Returns string used for values != 0 in INT8 types.
    */
-  std::string get_true_value(void) const { return _true_value; }
+  [[nodiscard]] std::string get_true_value() const { return _true_value; }
 
   /**
    * @brief Returns string used for values == 0 in INT8 types.
    */
-  std::string get_false_value(void) const { return _false_value; }
+  [[nodiscard]] std::string get_false_value() const { return _false_value; }
 
   // Setter
   /**
diff --git a/cpp/include/cudf/io/data_sink.hpp b/cpp/include/cudf/io/data_sink.hpp
index 2c1966ee6ba..6d4c8ec9b8c 100644
--- a/cpp/include/cudf/io/data_sink.hpp
+++ b/cpp/include/cudf/io/data_sink.hpp
@@ -120,7 +120,7 @@ class data_sink {
    *
    * @return bool If this writer supports device_write() calls.
    */
-  virtual bool supports_device_write() const { return false; }
+  [[nodiscard]] virtual bool supports_device_write() const { return false; }
 
   /**
    * @brief Estimates whether a direct device write would be more optimal for the given size.
@@ -128,7 +128,10 @@ class data_sink {
    * @param size Number of bytes to write
    * @return whether the device write is expected to be more performant for the given size
    */
-  virtual bool is_device_write_preferred(size_t size) const { return supports_device_write(); }
+  [[nodiscard]] virtual bool is_device_write_preferred(size_t size) const
+  {
+    return supports_device_write();
+  }
 
   /**
    * @brief Append the buffer content to the sink from a gpu address
diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp
index 627ec29a496..18ab8aad088 100644
--- a/cpp/include/cudf/io/datasource.hpp
+++ b/cpp/include/cudf/io/datasource.hpp
@@ -52,12 +52,12 @@ class datasource {
     /**
      * @brief Returns the buffer size in bytes.
      */
-    virtual size_t size() const = 0;
+    [[nodiscard]] virtual size_t size() const = 0;
 
     /**
      * @brief Returns the address of the data in the buffer.
      */
-    virtual uint8_t const* data() const = 0;
+    [[nodiscard]] virtual uint8_t const* data() const = 0;
 
     /**
      * @brief Base class destructor
@@ -155,7 +155,7 @@ class datasource {
    *
    * @return bool Whether this source supports device_read() calls
    */
-  virtual bool supports_device_read() const { return false; }
+  [[nodiscard]] virtual bool supports_device_read() const { return false; }
 
   /**
    * @brief Estimates whether a direct device read would be more optimal for the given size.
@@ -163,7 +163,10 @@ class datasource {
    * @param size Number of bytes to read
    * @return whether the device read is expected to be more performant for the given size
    */
-  virtual bool is_device_read_preferred(size_t size) const { return supports_device_read(); }
+  [[nodiscard]] virtual bool is_device_read_preferred(size_t size) const
+  {
+    return supports_device_read();
+  }
 
   /**
    * @brief Returns a device buffer with a subset of data from the source.
@@ -243,31 +246,31 @@ class datasource {
    *
    * @return size_t The size of the source data in bytes
    */
-  virtual size_t size() const = 0;
+  [[nodiscard]] virtual size_t size() const = 0;
 
   /**
    * @brief Returns whether the source contains any data.
    *
    * @return bool True if there is data, False otherwise
    */
-  virtual bool is_empty() const { return size() == 0; }
+  [[nodiscard]] virtual bool is_empty() const { return size() == 0; }
 
   /**
    * @brief Implementation for non owning buffer where datasource holds buffer until destruction.
    */
   class non_owning_buffer : public buffer {
    public:
-    non_owning_buffer() : _data(0), _size(0) {}
+    non_owning_buffer() {}
 
     non_owning_buffer(uint8_t* data, size_t size) : _data(data), _size(size) {}
 
-    size_t size() const override { return _size; }
+    [[nodiscard]] size_t size() const override { return _size; }
 
-    uint8_t const* data() const override { return _data; }
+    [[nodiscard]] uint8_t const* data() const override { return _data; }
 
    private:
-    uint8_t* const _data;
-    size_t const _size;
+    uint8_t* const _data{nullptr};
+    size_t const _size{0};
   };
 
   /**
@@ -297,9 +300,12 @@ class datasource {
     {
     }
 
-    size_t size() const override { return _size; }
+    [[nodiscard]] size_t size() const override { return _size; }
 
-    uint8_t const* data() const override { return static_cast<uint8_t const*>(_data_ptr); }
+    [[nodiscard]] uint8_t const* data() const override
+    {
+      return static_cast<uint8_t const*>(_data_ptr);
+    }
 
    private:
     Container _data;
@@ -330,8 +336,8 @@ class arrow_io_source : public datasource {
       : arrow_buffer(arrow_buffer)
     {
     }
-    size_t size() const override { return arrow_buffer->size(); }
-    uint8_t const* data() const override { return arrow_buffer->data(); }
+    [[nodiscard]] size_t size() const override { return arrow_buffer->size(); }
+    [[nodiscard]] uint8_t const* data() const override { return arrow_buffer->data(); }
   };
 
  public:
@@ -393,7 +399,7 @@ class arrow_io_source : public datasource {
   /**
    * @brief Returns the size of the data in the `arrow` source.
    */
-  size_t size() const override
+  [[nodiscard]] size_t size() const override
   {
     auto result = arrow_file->GetSize();
     CUDF_EXPECTS(result.ok(), "Cannot get file size");
diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp
index 5f34803f28e..727c24a4431 100644
--- a/cpp/include/cudf/io/json.hpp
+++ b/cpp/include/cudf/io/json.hpp
@@ -111,7 +111,7 @@ class json_reader_options {
   /**
    * @brief Returns source info.
    */
-  source_info const& get_source() const { return _source; }
+  [[nodiscard]] source_info const& get_source() const { return _source; }
 
   /**
    * @brief Returns data types of the columns.
diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp
index b3a2f6bcbbb..108251dd646 100644
--- a/cpp/include/cudf/io/orc.hpp
+++ b/cpp/include/cudf/io/orc.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -102,12 +102,12 @@ class orc_reader_options {
   /**
    * @brief Returns source info.
    */
-  source_info const& get_source() const { return _source; }
+  [[nodiscard]] source_info const& get_source() const { return _source; }
 
   /**
    * @brief Returns names of the columns to read.
    */
-  std::vector<std::string> const& get_columns() const { return _columns; }
+  [[nodiscard]] std::vector<std::string> const& get_columns() const { return _columns; }
 
   /**
    * @brief Returns vector of vectors, stripes to read for each input source
@@ -434,6 +434,18 @@ table_with_metadata read_orc(
  */
 class orc_writer_options_builder;
 
+/**
+ * @brief Constants to disambiguate statistics terminology for ORC.
+ *
+ * ORC refers to its finest granularity of row-grouping as "row group",
+ * which corresponds to Parquet "pages".
+ * Similarly, ORC's "stripe" corresponds to a Parquet "row group".
+ * The following constants disambiguate the terminology for the statistics
+ * collected at each level.
+ */
+static constexpr statistics_freq ORC_STATISTICS_STRIPE    = statistics_freq::STATISTICS_ROWGROUP;
+static constexpr statistics_freq ORC_STATISTICS_ROW_GROUP = statistics_freq::STATISTICS_PAGE;
+
 /**
  * @brief Settings to use for `write_orc()`.
  */
@@ -442,8 +454,8 @@ class orc_writer_options {
   sink_info _sink;
   // Specify the compression format to use
   compression_type _compression = compression_type::AUTO;
-  // Enable writing column statistics
-  bool _enable_statistics = true;
+  // Specify frequency of statistics collection
+  statistics_freq _stats_freq = ORC_STATISTICS_ROW_GROUP;
   // Maximum size of each stripe (unless smaller than a single row group)
   size_t _stripe_size_bytes = default_stripe_size_bytes;
   // Maximum number of rows in stripe (unless smaller than a single row group)
@@ -491,27 +503,35 @@ class orc_writer_options {
   /**
    * @brief Returns sink info.
    */
-  sink_info const& get_sink() const { return _sink; }
+  [[nodiscard]] sink_info const& get_sink() const { return _sink; }
 
   /**
    * @brief Returns compression type.
    */
-  compression_type get_compression() const { return _compression; }
+  [[nodiscard]] compression_type get_compression() const { return _compression; }
 
   /**
    * @brief Whether writing column statistics is enabled/disabled.
    */
-  bool is_enabled_statistics() const { return _enable_statistics; }
+  [[nodiscard]] bool is_enabled_statistics() const
+  {
+    return _stats_freq != statistics_freq::STATISTICS_NONE;
+  }
+
+  /**
+   * @brief Returns frequency of statistics collection.
+   */
+  [[nodiscard]] statistics_freq get_statistics_freq() const { return _stats_freq; }
 
   /**
    * @brief Returns maximum stripe size, in bytes.
    */
-  auto get_stripe_size_bytes() const { return _stripe_size_bytes; }
+  [[nodiscard]] auto get_stripe_size_bytes() const { return _stripe_size_bytes; }
 
   /**
    * @brief Returns maximum stripe size, in rows.
    */
-  auto get_stripe_size_rows() const { return _stripe_size_rows; }
+  [[nodiscard]] auto get_stripe_size_rows() const { return _stripe_size_rows; }
 
   /**
    * @brief Returns the row index stride.
@@ -525,17 +545,20 @@ class orc_writer_options {
   /**
    * @brief Returns table to be written to output.
    */
-  table_view get_table() const { return _table; }
+  [[nodiscard]] table_view get_table() const { return _table; }
 
   /**
    * @brief Returns associated metadata.
    */
-  table_input_metadata const* get_metadata() const { return _metadata; }
+  [[nodiscard]] table_input_metadata const* get_metadata() const { return _metadata; }
 
   /**
    * @brief Returns Key-Value footer metadata information.
    */
-  std::map<std::string, std::string> const& get_key_value_metadata() const { return _user_data; }
+  [[nodiscard]] std::map<std::string, std::string> const& get_key_value_metadata() const
+  {
+    return _user_data;
+  }
 
   // Setters
 
@@ -547,11 +570,16 @@ class orc_writer_options {
   void set_compression(compression_type comp) { _compression = comp; }
 
   /**
-   * @brief Enable/Disable writing column statistics.
+   * @brief Choose granularity of statistics collection.
+   *
+   * The granularity can be set to:
+   * - cudf::io::STATISTICS_NONE: No statistics are collected.
+   * - cudf::io::ORC_STATISTICS_STRIPE: Statistics are collected for each ORC stripe.
+   * - cudf::io::ORC_STATISTICS_ROWGROUP: Statistics are collected for each ORC row group.
    *
-   * @param val Boolean value to enable/disable statistics.
+   * @param val Frequency of statistics collection.
    */
-  void enable_statistics(bool val) { _enable_statistics = val; }
+  void enable_statistics(statistics_freq val) { _stats_freq = val; }
 
   /**
    * @brief Sets the maximum stripe size, in bytes.
@@ -644,14 +672,19 @@ class orc_writer_options_builder {
   }
 
   /**
-   * @brief Enable/Disable writing column statistics.
+   * @brief Choose granularity of column statistics to be written
    *
-   * @param val Boolean value to enable/disable.
+   * The granularity can be set to:
+   * - cudf::io::STATISTICS_NONE: No statistics are collected.
+   * - cudf::io::ORC_STATISTICS_STRIPE: Statistics are collected for each ORC stripe.
+   * - cudf::io::ORC_STATISTICS_ROWGROUP: Statistics are collected for each ORC row group.
+   *
+   * @param val Level of statistics collection.
    * @return this for chaining.
    */
-  orc_writer_options_builder& enable_statistics(bool val)
+  orc_writer_options_builder& enable_statistics(statistics_freq val)
   {
-    options._enable_statistics = val;
+    options._stats_freq = val;
     return *this;
   }
 
@@ -772,8 +805,8 @@ class chunked_orc_writer_options {
   sink_info _sink;
   // Specify the compression format to use
   compression_type _compression = compression_type::AUTO;
-  // Enable writing column statistics
-  bool _enable_statistics = true;
+  // Specify granularity of statistics collection
+  statistics_freq _stats_freq = ORC_STATISTICS_ROW_GROUP;
   // Maximum size of each stripe (unless smaller than a single row group)
   size_t _stripe_size_bytes = default_stripe_size_bytes;
   // Maximum number of rows in stripe (unless smaller than a single row group)
@@ -814,27 +847,27 @@ class chunked_orc_writer_options {
   /**
    * @brief Returns sink info.
    */
-  sink_info const& get_sink() const { return _sink; }
+  [[nodiscard]] sink_info const& get_sink() const { return _sink; }
 
   /**
    * @brief Returns compression type.
    */
-  compression_type get_compression() const { return _compression; }
+  [[nodiscard]] compression_type get_compression() const { return _compression; }
 
   /**
-   * @brief Whether writing column statistics is enabled/disabled.
+   * @brief Returns granularity of statistics collection.
    */
-  bool is_enabled_statistics() const { return _enable_statistics; }
+  [[nodiscard]] statistics_freq get_statistics_freq() const { return _stats_freq; }
 
   /**
    * @brief Returns maximum stripe size, in bytes.
    */
-  auto get_stripe_size_bytes() const { return _stripe_size_bytes; }
+  [[nodiscard]] auto get_stripe_size_bytes() const { return _stripe_size_bytes; }
 
   /**
    * @brief Returns maximum stripe size, in rows.
    */
-  auto get_stripe_size_rows() const { return _stripe_size_rows; }
+  [[nodiscard]] auto get_stripe_size_rows() const { return _stripe_size_rows; }
 
   /**
    * @brief Returns the row index stride.
@@ -848,12 +881,15 @@ class chunked_orc_writer_options {
   /**
    * @brief Returns associated metadata.
    */
-  table_input_metadata const* get_metadata() const { return _metadata; }
+  [[nodiscard]] table_input_metadata const* get_metadata() const { return _metadata; }
 
   /**
    * @brief Returns Key-Value footer metadata information.
    */
-  std::map<std::string, std::string> const& get_key_value_metadata() const { return _user_data; }
+  [[nodiscard]] std::map<std::string, std::string> const& get_key_value_metadata() const
+  {
+    return _user_data;
+  }
 
   // Setters
 
@@ -865,11 +901,16 @@ class chunked_orc_writer_options {
   void set_compression(compression_type comp) { _compression = comp; }
 
   /**
-   * @brief Enable/Disable writing column statistics.
+   * @brief Choose granularity of statistics collection
    *
-   * @param val Boolean value to enable/disable.
+   * The granularity can be set to:
+   * - cudf::io::STATISTICS_NONE: No statistics are collected.
+   * - cudf::io::ORC_STATISTICS_STRIPE: Statistics are collected for each ORC stripe.
+   * - cudf::io::ORC_STATISTICS_ROWGROUP: Statistics are collected for each ORC row group.
+   *
+   * @param val Frequency of statistics collection.
    */
-  void enable_statistics(bool val) { _enable_statistics = val; }
+  void enable_statistics(statistics_freq val) { _stats_freq = val; }
 
   /**
    * @brief Sets the maximum stripe size, in bytes.
@@ -952,14 +993,19 @@ class chunked_orc_writer_options_builder {
   }
 
   /**
-   * @brief Enable/Disable writing column statistics.
+   * @brief Choose granularity of statistics collection
+   *
+   * The granularity can be set to:
+   * - cudf::io::STATISTICS_NONE: No statistics are collected.
+   * - cudf::io::ORC_STATISTICS_STRIPE: Statistics are collected for each ORC stripe.
+   * - cudf::io::ORC_STATISTICS_ROWGROUP: Statistics are collected for each ORC row group.
    *
-   * @param val Boolean value to enable/disable.
+   * @param val Frequency of statistics collection.
    * @return this for chaining.
    */
-  chunked_orc_writer_options_builder& enable_statistics(bool val)
+  chunked_orc_writer_options_builder& enable_statistics(statistics_freq val)
   {
-    options._enable_statistics = val;
+    options._stats_freq = val;
     return *this;
   }
 
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 740f7a8b2db..2ceac947c8d 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -96,33 +96,36 @@ class parquet_reader_options {
   /**
    * @brief Returns source info.
    */
-  source_info const& get_source() const { return _source; }
+  [[nodiscard]] source_info const& get_source() const { return _source; }
 
   /**
    * @brief Returns true/false depending on whether strings should be converted to categories or
    * not.
    */
-  bool is_enabled_convert_strings_to_categories() const { return _convert_strings_to_categories; }
+  [[nodiscard]] bool is_enabled_convert_strings_to_categories() const
+  {
+    return _convert_strings_to_categories;
+  }
 
   /**
    * @brief Returns true/false depending whether to use pandas metadata or not while reading.
    */
-  bool is_enabled_use_pandas_metadata() const { return _use_pandas_metadata; }
+  [[nodiscard]] bool is_enabled_use_pandas_metadata() const { return _use_pandas_metadata; }
 
   /**
    * @brief Returns number of rows to skip from the start.
    */
-  size_type get_skip_rows() const { return _skip_rows; }
+  [[nodiscard]] size_type get_skip_rows() const { return _skip_rows; }
 
   /**
    * @brief Returns number of rows to read.
    */
-  size_type get_num_rows() const { return _num_rows; }
+  [[nodiscard]] size_type get_num_rows() const { return _num_rows; }
 
   /**
    * @brief Returns names of column to be read.
    */
-  std::vector<std::string> const& get_columns() const { return _columns; }
+  [[nodiscard]] std::vector<std::string> const& get_columns() const { return _columns; }
 
   /**
    * @brief Returns list of individual row groups to be read.
@@ -421,32 +424,32 @@ class parquet_writer_options {
   /**
    * @brief Returns sink info.
    */
-  sink_info const& get_sink() const { return _sink; }
+  [[nodiscard]] sink_info const& get_sink() const { return _sink; }
 
   /**
    * @brief Returns compression format used.
    */
-  compression_type get_compression() const { return _compression; }
+  [[nodiscard]] compression_type get_compression() const { return _compression; }
 
   /**
    * @brief Returns level of statistics requested in output file.
    */
-  statistics_freq get_stats_level() const { return _stats_level; }
+  [[nodiscard]] statistics_freq get_stats_level() const { return _stats_level; }
 
   /**
    * @brief Returns table_view.
    */
-  table_view get_table() const { return _table; }
+  [[nodiscard]] table_view get_table() const { return _table; }
 
   /**
    * @brief Returns partitions.
    */
-  std::vector<partition_info> const& get_partitions() const { return _partitions; }
+  [[nodiscard]] std::vector<partition_info> const& get_partitions() const { return _partitions; }
 
   /**
    * @brief Returns associated metadata.
    */
-  table_input_metadata const* get_metadata() const { return _metadata; }
+  [[nodiscard]] table_input_metadata const* get_metadata() const { return _metadata; }
 
   /**
    * @brief Returns Key-Value footer metadata information.
@@ -801,22 +804,22 @@ class chunked_parquet_writer_options {
   /**
    * @brief Returns sink info.
    */
-  sink_info const& get_sink() const { return _sink; }
+  [[nodiscard]] sink_info const& get_sink() const { return _sink; }
 
   /**
    * @brief Returns compression format used.
    */
-  compression_type get_compression() const { return _compression; }
+  [[nodiscard]] compression_type get_compression() const { return _compression; }
 
   /**
    * @brief Returns level of statistics requested in output file.
    */
-  statistics_freq get_stats_level() const { return _stats_level; }
+  [[nodiscard]] statistics_freq get_stats_level() const { return _stats_level; }
 
   /**
    * @brief Returns metadata information.
    */
-  table_input_metadata const* get_metadata() const { return _metadata; }
+  [[nodiscard]] table_input_metadata const* get_metadata() const { return _metadata; }
 
   /**
    * @brief Returns Key-Value footer metadata information.
diff --git a/cpp/include/cudf/io/text/data_chunk_source.hpp b/cpp/include/cudf/io/text/data_chunk_source.hpp
index e65afa04fe5..5e6dda5a514 100644
--- a/cpp/include/cudf/io/text/data_chunk_source.hpp
+++ b/cpp/include/cudf/io/text/data_chunk_source.hpp
@@ -36,8 +36,8 @@ namespace text {
  */
 class device_data_chunk {
  public:
-  virtual char const* data() const                 = 0;
-  virtual std::size_t size() const                 = 0;
+  [[nodiscard]] virtual char const* data() const   = 0;
+  [[nodiscard]] virtual std::size_t size() const   = 0;
   virtual operator device_span<char const>() const = 0;
 };
 
@@ -76,7 +76,7 @@ class data_chunk_reader {
  */
 class data_chunk_source {
  public:
-  virtual std::unique_ptr<data_chunk_reader> create_reader() const = 0;
+  [[nodiscard]] virtual std::unique_ptr<data_chunk_reader> create_reader() const = 0;
 };
 
 }  // namespace text
diff --git a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp
index 6b95de53ee7..aeb4b7fff53 100644
--- a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp
+++ b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp
@@ -41,8 +41,8 @@ class device_span_data_chunk : public device_data_chunk {
  public:
   device_span_data_chunk(device_span<char const> data) : _data(data) {}
 
-  char const* data() const override { return _data.data(); }
-  std::size_t size() const override { return _data.size(); }
+  [[nodiscard]] char const* data() const override { return _data.data(); }
+  [[nodiscard]] std::size_t size() const override { return _data.size(); }
   operator device_span<char const>() const override { return _data; }
 
  private:
@@ -53,8 +53,8 @@ class device_uvector_data_chunk : public device_data_chunk {
  public:
   device_uvector_data_chunk(rmm::device_uvector<char>&& data) : _data(std::move(data)) {}
 
-  char const* data() const override { return _data.data(); }
-  std::size_t size() const override { return _data.size(); }
+  [[nodiscard]] char const* data() const override { return _data.data(); }
+  [[nodiscard]] std::size_t size() const override { return _data.size(); }
   operator device_span<char const>() const override { return _data; }
 
  private:
@@ -171,7 +171,7 @@ class device_span_data_chunk_reader : public data_chunk_reader {
 class file_data_chunk_source : public data_chunk_source {
  public:
   file_data_chunk_source(std::string filename) : _filename(filename) {}
-  std::unique_ptr<data_chunk_reader> create_reader() const override
+  [[nodiscard]] std::unique_ptr<data_chunk_reader> create_reader() const override
   {
     return std::make_unique<istream_data_chunk_reader>(
       std::make_unique<std::ifstream>(_filename, std::ifstream::in));
@@ -187,7 +187,7 @@ class file_data_chunk_source : public data_chunk_source {
 class string_data_chunk_source : public data_chunk_source {
  public:
   string_data_chunk_source(std::string const& data) : _data(data) {}
-  std::unique_ptr<data_chunk_reader> create_reader() const override
+  [[nodiscard]] std::unique_ptr<data_chunk_reader> create_reader() const override
   {
     return std::make_unique<istream_data_chunk_reader>(std::make_unique<std::istringstream>(_data));
   }
@@ -202,7 +202,7 @@ class string_data_chunk_source : public data_chunk_source {
 class device_span_data_chunk_source : public data_chunk_source {
  public:
   device_span_data_chunk_source(device_span<char const> data) : _data(data) {}
-  std::unique_ptr<data_chunk_reader> create_reader() const override
+  [[nodiscard]] std::unique_ptr<data_chunk_reader> create_reader() const override
   {
     return std::make_unique<device_span_data_chunk_reader>(_data);
   }
diff --git a/cpp/include/cudf/io/text/detail/multistate.hpp b/cpp/include/cudf/io/text/detail/multistate.hpp
index d3c8909ab51..e7136ac69a5 100644
--- a/cpp/include/cudf/io/text/detail/multistate.hpp
+++ b/cpp/include/cudf/io/text/detail/multistate.hpp
@@ -37,7 +37,7 @@ struct multistate {
      * @brief Creates a segment which represents (0, 0]
      */
 
-    constexpr multistate_segment() : _data(0) {}
+    constexpr multistate_segment() = default;
     /**
      * @brief Creates a segment which represents (head, tail]
      *
@@ -52,15 +52,15 @@ struct multistate {
     /**
      * @brief Get's the (head, ____] value from the segment.
      */
-    constexpr uint8_t get_head() const { return _data & 0b1111; }
+    [[nodiscard]] constexpr uint8_t get_head() const { return _data & 0b1111; }
 
     /**
      * @brief Get's the (____, tail] value from the segment.
      */
-    constexpr uint8_t get_tail() const { return _data >> 4; }
+    [[nodiscard]] constexpr uint8_t get_tail() const { return _data >> 4; }
 
    private:
-    uint8_t _data;
+    uint8_t _data{0};
   };
 
  public:
@@ -87,12 +87,12 @@ struct multistate {
   /**
    * @brief get's the number of segments this multistate represents
    */
-  constexpr uint8_t size() const { return _size; }
+  [[nodiscard]] constexpr uint8_t size() const { return _size; }
 
   /**
    * @brief get's the highest (____, tail] value this multistate represents
    */
-  constexpr uint8_t max_tail() const
+  [[nodiscard]] constexpr uint8_t max_tail() const
   {
     uint8_t maximum = 0;
 
@@ -106,12 +106,12 @@ struct multistate {
   /**
    * @brief get's the Nth (head, ____] value state this multistate represents
    */
-  constexpr uint8_t get_head(uint8_t idx) const { return _segments[idx].get_head(); }
+  [[nodiscard]] constexpr uint8_t get_head(uint8_t idx) const { return _segments[idx].get_head(); }
 
   /**
    * @brief get's the Nth (____, tail] value state this multistate represents
    */
-  constexpr uint8_t get_tail(uint8_t idx) const { return _segments[idx].get_tail(); }
+  [[nodiscard]] constexpr uint8_t get_tail(uint8_t idx) const { return _segments[idx].get_tail(); }
 
  private:
   uint8_t _size = 0;
diff --git a/cpp/include/cudf/io/text/detail/trie.hpp b/cpp/include/cudf/io/text/detail/trie.hpp
index d14fe15b0a9..06d15276a68 100644
--- a/cpp/include/cudf/io/text/detail/trie.hpp
+++ b/cpp/include/cudf/io/text/detail/trie.hpp
@@ -161,13 +161,13 @@ struct trie {
   /**
    * @brief Gets the number of nodes contained in this trie.
    */
-  cudf::size_type size() const { return _nodes.size(); }
+  [[nodiscard]] cudf::size_type size() const { return _nodes.size(); }
 
   /**
    * @brief A pessimistic count of duplicate tokens in the trie. Used to determine the maximum
    * possible stack size required to compute matches of this trie in parallel.
    */
-  cudf::size_type max_duplicate_tokens() const { return _max_duplicate_tokens; }
+  [[nodiscard]] cudf::size_type max_duplicate_tokens() const { return _max_duplicate_tokens; }
 
   /**
    * @brief Create a trie which represents the given pattern.
@@ -255,7 +255,7 @@ struct trie {
                 cudf::detail::make_device_uvector_sync(trie_nodes, stream, mr)};
   }
 
-  trie_device_view view() const { return trie_device_view{_nodes}; }
+  [[nodiscard]] trie_device_view view() const { return trie_device_view{_nodes}; }
 };
 
 }  // namespace detail
diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp
index 8f06de99f05..7e4ab5b8d9d 100644
--- a/cpp/include/cudf/io/types.hpp
+++ b/cpp/include/cudf/io/types.hpp
@@ -155,14 +155,8 @@ struct source_info {
 
   source_info() = default;
 
-  explicit source_info(std::vector<std::string> const& file_paths)
-    : _type(io_type::FILEPATH), _filepaths(file_paths)
-  {
-  }
-  explicit source_info(std::string const& file_path)
-    : _type(io_type::FILEPATH), _filepaths({file_path})
-  {
-  }
+  explicit source_info(std::vector<std::string> const& file_paths) : _filepaths(file_paths) {}
+  explicit source_info(std::string const& file_path) : _filepaths({file_path}) {}
 
   explicit source_info(std::vector<host_buffer> const& host_buffers)
     : _type(io_type::HOST_BUFFER), _buffers(host_buffers)
@@ -182,11 +176,11 @@ struct source_info {
   {
   }
 
-  auto type() const { return _type; }
-  auto const& filepaths() const { return _filepaths; }
-  auto const& buffers() const { return _buffers; }
-  auto const& files() const { return _files; }
-  auto const& user_sources() const { return _user_sources; }
+  [[nodiscard]] auto type() const { return _type; }
+  [[nodiscard]] auto const& filepaths() const { return _filepaths; }
+  [[nodiscard]] auto const& buffers() const { return _buffers; }
+  [[nodiscard]] auto const& files() const { return _files; }
+  [[nodiscard]] auto const& user_sources() const { return _user_sources; }
 
  private:
   io_type _type = io_type::FILEPATH;
@@ -200,7 +194,7 @@ struct source_info {
  */
 struct sink_info {
   sink_info() = default;
-  sink_info(size_t num_sinks) : _type(io_type::VOID), _num_sinks(num_sinks) {}
+  sink_info(size_t num_sinks) : _num_sinks(num_sinks) {}
 
   explicit sink_info(std::vector<std::string> const& file_paths)
     : _type(io_type::FILEPATH), _num_sinks(file_paths.size()), _filepaths(file_paths)
@@ -226,11 +220,11 @@ struct sink_info {
   {
   }
 
-  auto type() const { return _type; }
-  auto num_sinks() const { return _num_sinks; }
-  auto const& filepaths() const { return _filepaths; }
-  auto const& buffers() const { return _buffers; }
-  auto const& user_sinks() const { return _user_sinks; }
+  [[nodiscard]] auto type() const { return _type; }
+  [[nodiscard]] auto num_sinks() const { return _num_sinks; }
+  [[nodiscard]] auto const& filepaths() const { return _filepaths; }
+  [[nodiscard]] auto const& buffers() const { return _buffers; }
+  [[nodiscard]] auto const& user_sinks() const { return _user_sinks; }
 
  private:
   io_type _type     = io_type::VOID;
@@ -344,51 +338,51 @@ class column_in_metadata {
    * @param i Index of the child to get
    * @return this for chaining
    */
-  column_in_metadata const& child(size_type i) const { return children[i]; }
+  [[nodiscard]] column_in_metadata const& child(size_type i) const { return children[i]; }
 
   /**
    * @brief Get the name of this column
    */
-  std::string get_name() const { return _name; }
+  [[nodiscard]] std::string get_name() const { return _name; }
 
   /**
    * @brief Get whether nullability has been explicitly set for this column.
    */
-  bool is_nullability_defined() const { return _nullable.has_value(); }
+  [[nodiscard]] bool is_nullability_defined() const { return _nullable.has_value(); }
 
   /**
    * @brief Gets the explicitly set nullability for this column.
    * @throws If nullability is not explicitly defined for this column.
    *         Check using `is_nullability_defined()` first.
    */
-  bool nullable() const { return _nullable.value(); }
+  [[nodiscard]] bool nullable() const { return _nullable.value(); }
 
   /**
    * @brief If this is the metadata of a list column, returns whether it is to be encoded as a map.
    */
-  bool is_map() const { return _list_column_is_map; }
+  [[nodiscard]] bool is_map() const { return _list_column_is_map; }
 
   /**
    * @brief Get whether to encode this timestamp column using deprecated int96 physical type
    */
-  bool is_enabled_int96_timestamps() const { return _use_int96_timestamp; }
+  [[nodiscard]] bool is_enabled_int96_timestamps() const { return _use_int96_timestamp; }
 
   /**
    * @brief Get whether precision has been set for this decimal column
    */
-  bool is_decimal_precision_set() const { return _decimal_precision.has_value(); }
+  [[nodiscard]] bool is_decimal_precision_set() const { return _decimal_precision.has_value(); }
 
   /**
    * @brief Get the decimal precision that was set for this column.
    * @throws If decimal precision was not set for this column.
    *         Check using `is_decimal_precision_set()` first.
    */
-  uint8_t get_decimal_precision() const { return _decimal_precision.value(); }
+  [[nodiscard]] uint8_t get_decimal_precision() const { return _decimal_precision.value(); }
 
   /**
    * @brief Get the number of children of this column
    */
-  size_type num_children() const { return children.size(); }
+  [[nodiscard]] size_type num_children() const { return children.size(); }
 };
 
 class table_input_metadata {
diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 30400074c50..f6efea5f2bb 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -607,9 +607,10 @@ class hash_join {
    * @return The exact number of output when performing an inner join between two tables with
    * `build` and `probe` as the the join keys .
    */
-  std::size_t inner_join_size(cudf::table_view const& probe,
-                              null_equality compare_nulls  = null_equality::EQUAL,
-                              rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
+  [[nodiscard]] std::size_t inner_join_size(
+    cudf::table_view const& probe,
+    null_equality compare_nulls  = null_equality::EQUAL,
+    rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
 
   /**
    * Returns the exact number of matches (rows) when performing a left join with the specified probe
@@ -622,9 +623,10 @@ class hash_join {
    * @return The exact number of output when performing a left join between two tables with `build`
    * and `probe` as the the join keys .
    */
-  std::size_t left_join_size(cudf::table_view const& probe,
-                             null_equality compare_nulls  = null_equality::EQUAL,
-                             rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
+  [[nodiscard]] std::size_t left_join_size(
+    cudf::table_view const& probe,
+    null_equality compare_nulls  = null_equality::EQUAL,
+    rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
 
   /**
    * Returns the exact number of matches (rows) when performing a full join with the specified probe
@@ -1037,6 +1039,109 @@ mixed_full_join(
   std::optional<std::pair<std::size_t, device_span<size_type const>>> output_size_data = {},
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns an index vector corresponding to all rows in the left tables
+ * where the columns of the equality table are equal and the predicate
+ * evaluates to true on the conditional tables.
+ *
+ * If the provided predicate returns NULL for a pair of rows (left, right), the
+ * left row is not included in the output. It is the user's responsiblity to
+ * choose a suitable compare_nulls value AND use appropriate null-safe
+ * operators in the expression.
+ *
+ * If the provided output size or per-row counts are incorrect, behavior is undefined.
+ *
+ * @code{.pseudo}
+ * left_equality: {{0, 1, 2}}
+ * right_equality: {{1, 2, 3}}
+ * left_conditional: {{4, 4, 4}}
+ * right_conditional: {{3, 4, 5}}
+ * Expression: Left.Column_0 > Right.Column_0
+ * Result: {1}
+ * @endcode
+ *
+ * @throw cudf::logic_error If the binary predicate outputs a non-boolean result.
+ * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not
+ * match.
+ * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not
+ * match.
+ *
+ * @param left_equality The left table used for the equality join.
+ * @param right_equality The right table used for the equality join.
+ * @param left_conditional The left table used for the conditional join.
+ * @param right_conditional The right table used for the conditional join.
+ * @param binary_predicate The condition on which to join.
+ * @param compare_nulls Whether or not null values join to each other or not.
+ * @param output_size_data An optional pair of values indicating the exact output size and the
+ * number of matches for each row in the larger of the two input tables, left or right (may be
+ * precomputed using the corresponding mixed_full_join_size API).
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
+ *
+ * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct
+ * the result of performing a mixed full join between the four input tables.
+ */
+std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_semi_join(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls = null_equality::EQUAL,
+  std::optional<std::pair<std::size_t, device_span<size_type const>>> output_size_data = {},
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Returns an index vector corresponding to all rows in the left tables
+ * for which there is no row in the right tables where the columns of the
+ * equality table are equal and the predicate evaluates to true on the
+ * conditional tables.
+ *
+ * If the provided predicate returns NULL for a pair of rows (left, right), the
+ * left row is not included in the output. It is the user's responsiblity to
+ * choose a suitable compare_nulls value AND use appropriate null-safe
+ * operators in the expression.
+ *
+ * If the provided output size or per-row counts are incorrect, behavior is undefined.
+ *
+ * @code{.pseudo}
+ * left_equality: {{0, 1, 2}}
+ * right_equality: {{1, 2, 3}}
+ * left_conditional: {{4, 4, 4}}
+ * right_conditional: {{3, 4, 5}}
+ * Expression: Left.Column_0 > Right.Column_0
+ * Result: {0, 2}
+ * @endcode
+ *
+ * @throw cudf::logic_error If the binary predicate outputs a non-boolean result.
+ * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not
+ * match.
+ * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not
+ * match.
+ *
+ * @param left_equality The left table used for the equality join.
+ * @param right_equality The right table used for the equality join.
+ * @param left_conditional The left table used for the conditional join.
+ * @param right_conditional The right table used for the conditional join.
+ * @param binary_predicate The condition on which to join.
+ * @param compare_nulls Whether or not null values join to each other or not.
+ * @param output_size_data An optional pair of values indicating the exact output size and the
+ * number of matches for each row in the larger of the two input tables, left or right (may be
+ * precomputed using the corresponding mixed_full_join_size API).
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
+ *
+ * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct
+ * the result of performing a mixed full join between the four input tables.
+ */
+std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_anti_join(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls = null_equality::EQUAL,
+  std::optional<std::pair<std::size_t, device_span<size_type const>>> output_size_data = {},
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Returns the exact number of matches (rows) when performing a
  * mixed inner join between the specified tables where the columns of the
@@ -1123,6 +1228,90 @@ std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_le
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns the exact number of matches (rows) when performing a mixed
+ * left semi join between the specified tables where the columns of the
+ * equality table are equal and the predicate evaluates to true on the
+ * conditional tables.
+ *
+ * If the provided predicate returns NULL for a pair of rows (left, right),
+ * that pair is not included in the output. It is the user's responsiblity to
+ * choose a suitable compare_nulls value AND use appropriate null-safe
+ * operators in the expression.
+ *
+ * @throw cudf::logic_error If the binary predicate outputs a non-boolean result.
+ * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not
+ * match.
+ * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not
+ * match.
+ *
+ * @param left_equality The left table used for the equality join.
+ * @param right_equality The right table used for the equality join.
+ * @param left_conditional The left table used for the conditional join.
+ * @param right_conditional The right table used for the conditional join.
+ * @param binary_predicate The condition on which to join.
+ * @param compare_nulls Whether or not null values join to each other or not.
+ * @param output_size An optional pair of values indicating the exact output size and the number of
+ * matches for each row in the larger of the two input tables, left or right (may be precomputed
+ * using the corresponding mixed_inner_join_size API).
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
+ *
+ * @return A pair containing the size that would result from performing the
+ * requested join and the number of matches for each row in one of the two
+ * tables. Which of the two tables is an implementation detail and should not
+ * be relied upon, simply passed to the corresponding `mixed_left_join` API as
+ * is.
+ */
+std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_left_semi_join_size(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls         = null_equality::EQUAL,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Returns the exact number of matches (rows) when performing a mixed
+ * left anti join between the specified tables.
+ *
+ * If the provided predicate returns NULL for a pair of rows (left, right),
+ * that pair is not included in the output. It is the user's responsiblity to
+ * choose a suitable compare_nulls value AND use appropriate null-safe
+ * operators in the expression.
+ *
+ * @throw cudf::logic_error If the binary predicate outputs a non-boolean result.
+ * @throw cudf::logic_error If the number of rows in left_equality and left_conditional do not
+ * match.
+ * @throw cudf::logic_error If the number of rows in right_equality and right_conditional do not
+ * match.
+ *
+ * @param left_equality The left table used for the equality join.
+ * @param right_equality The right table used for the equality join.
+ * @param left_conditional The left table used for the conditional join.
+ * @param right_conditional The right table used for the conditional join.
+ * @param binary_predicate The condition on which to join.
+ * @param compare_nulls Whether or not null values join to each other or not.
+ * @param output_size An optional pair of values indicating the exact output size and the number of
+ * matches for each row in the larger of the two input tables, left or right (may be precomputed
+ * using the corresponding mixed_inner_join_size API).
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
+ *
+ * @return A pair containing the size that would result from performing the
+ * requested join and the number of matches for each row in one of the two
+ * tables. Which of the two tables is an implementation detail and should not
+ * be relied upon, simply passed to the corresponding `mixed_left_join` API as
+ * is.
+ */
+std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_left_anti_join_size(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls         = null_equality::EQUAL,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Returns the exact number of matches (rows) when performing a
  * conditional inner join between the specified tables where the predicate
diff --git a/cpp/include/cudf/lists/detail/scatter_helper.cuh b/cpp/include/cudf/lists/detail/scatter_helper.cuh
index bdf68037944..34747f4a2c7 100644
--- a/cpp/include/cudf/lists/detail/scatter_helper.cuh
+++ b/cpp/include/cudf/lists/detail/scatter_helper.cuh
@@ -91,17 +91,17 @@ struct unbound_list_view {
   /**
    * @brief Returns number of elements in this list row.
    */
-  __device__ inline size_type size() const { return _size; }
+  [[nodiscard]] __device__ inline size_type size() const { return _size; }
 
   /**
    * @brief Returns whether this row came from the `scatter()` source or target
    */
-  __device__ inline label_type label() const { return _label; }
+  [[nodiscard]] __device__ inline label_type label() const { return _label; }
 
   /**
    * @brief Returns the index in the source/target column
    */
-  __device__ inline size_type row_index() const { return _row_index; }
+  [[nodiscard]] __device__ inline size_type row_index() const { return _row_index; }
 
   /**
    * @brief Binds to source/target column (depending on SOURCE/TARGET labels),
@@ -111,7 +111,7 @@ struct unbound_list_view {
    * @param scatter_target Target column for the scatter operation
    * @return A (bound) list_view for the row that this object represents
    */
-  __device__ inline list_device_view bind_to_column(
+  [[nodiscard]] __device__ inline list_device_view bind_to_column(
     lists_column_device_view const& scatter_source,
     lists_column_device_view const& scatter_target) const
   {
diff --git a/cpp/include/cudf/lists/list_device_view.cuh b/cpp/include/cudf/lists/list_device_view.cuh
index 5071f046e0c..e4803f98e68 100644
--- a/cpp/include/cudf/lists/list_device_view.cuh
+++ b/cpp/include/cudf/lists/list_device_view.cuh
@@ -69,7 +69,7 @@ class list_device_view {
    * The offset of this element as stored in the child column (i.e. 5)
    * may be fetched using this method.
    */
-  __device__ inline size_type element_offset(size_type idx) const
+  [[nodiscard]] __device__ inline size_type element_offset(size_type idx) const
   {
     cudf_assert(idx >= 0 && idx < size() && "idx out of bounds");
     return begin_offset + idx;
@@ -91,7 +91,7 @@ class list_device_view {
   /**
    * @brief Checks whether element is null at specified index in the list row.
    */
-  __device__ inline bool is_null(size_type idx) const
+  [[nodiscard]] __device__ inline bool is_null(size_type idx) const
   {
     cudf_assert(idx >= 0 && idx < size() && "Index out of bounds.");
     auto element_offset = begin_offset + idx;
@@ -101,17 +101,20 @@ class list_device_view {
   /**
    * @brief Checks whether this list row is null.
    */
-  __device__ inline bool is_null() const { return lists_column.is_null(_row_index); }
+  [[nodiscard]] __device__ inline bool is_null() const { return lists_column.is_null(_row_index); }
 
   /**
    * @brief Fetches the number of elements in this list row.
    */
-  __device__ inline size_type size() const { return _size; }
+  [[nodiscard]] __device__ inline size_type size() const { return _size; }
 
   /**
    * @brief Fetches the lists_column_device_view that contains this list.
    */
-  __device__ inline lists_column_device_view const& get_column() const { return lists_column; }
+  [[nodiscard]] __device__ inline lists_column_device_view const& get_column() const
+  {
+    return lists_column;
+  }
 
   template <typename T>
   struct pair_accessor;
@@ -141,7 +144,7 @@ class list_device_view {
    *   2. `p.second == false`
    */
   template <typename T>
-  __device__ inline const_pair_iterator<T> pair_begin() const
+  [[nodiscard]] __device__ inline const_pair_iterator<T> pair_begin() const
   {
     return const_pair_iterator<T>{thrust::counting_iterator<size_type>(0), pair_accessor<T>{*this}};
   }
@@ -151,7 +154,7 @@ class list_device_view {
    * list_device_view.
    */
   template <typename T>
-  __device__ inline const_pair_iterator<T> pair_end() const
+  [[nodiscard]] __device__ inline const_pair_iterator<T> pair_end() const
   {
     return const_pair_iterator<T>{thrust::counting_iterator<size_type>(size()),
                                   pair_accessor<T>{*this}};
@@ -173,7 +176,7 @@ class list_device_view {
    *   2. `p.second == false`
    */
   template <typename T>
-  __device__ inline const_pair_rep_iterator<T> pair_rep_begin() const
+  [[nodiscard]] __device__ inline const_pair_rep_iterator<T> pair_rep_begin() const
   {
     return const_pair_rep_iterator<T>{thrust::counting_iterator<size_type>(0),
                                       pair_rep_accessor<T>{*this}};
@@ -184,7 +187,7 @@ class list_device_view {
    * list_device_view.
    */
   template <typename T>
-  __device__ inline const_pair_rep_iterator<T> pair_rep_end() const
+  [[nodiscard]] __device__ inline const_pair_rep_iterator<T> pair_rep_end() const
   {
     return const_pair_rep_iterator<T>{thrust::counting_iterator<size_type>(size()),
                                       pair_rep_accessor<T>{*this}};
diff --git a/cpp/include/cudf/lists/lists_column_device_view.cuh b/cpp/include/cudf/lists/lists_column_device_view.cuh
index aff088a7f44..e48707ec298 100644
--- a/cpp/include/cudf/lists/lists_column_device_view.cuh
+++ b/cpp/include/cudf/lists/lists_column_device_view.cuh
@@ -46,12 +46,12 @@ class lists_column_device_view {
   /**
    * @brief Fetches number of rows in the lists column
    */
-  CUDF_HOST_DEVICE inline cudf::size_type size() const { return underlying.size(); }
+  [[nodiscard]] CUDF_HOST_DEVICE inline cudf::size_type size() const { return underlying.size(); }
 
   /**
    * @brief Fetches the offsets column of the underlying list column.
    */
-  __device__ inline column_device_view offsets() const
+  [[nodiscard]] __device__ inline column_device_view offsets() const
   {
     return underlying.child(lists_column_view::offsets_column_index);
   }
@@ -59,7 +59,7 @@ class lists_column_device_view {
   /**
    * @brief Fetches the child column of the underlying list column.
    */
-  __device__ inline column_device_view child() const
+  [[nodiscard]] __device__ inline column_device_view child() const
   {
     return underlying.child(lists_column_view::child_column_index);
   }
@@ -67,19 +67,22 @@ class lists_column_device_view {
   /**
    * @brief Indicates whether the list column is nullable.
    */
-  __device__ inline bool nullable() const { return underlying.nullable(); }
+  [[nodiscard]] __device__ inline bool nullable() const { return underlying.nullable(); }
 
   /**
    * @brief Indicates whether the row (i.e. list) at the specified
    * index is null.
    */
-  __device__ inline bool is_null(size_type idx) const { return underlying.is_null(idx); }
+  [[nodiscard]] __device__ inline bool is_null(size_type idx) const
+  {
+    return underlying.is_null(idx);
+  }
 
   /**
    * @brief Fetches the offset of the underlying column_device_view,
    *        in case it is a sliced/offset column.
    */
-  __device__ inline size_type offset() const { return underlying.offset(); }
+  [[nodiscard]] __device__ inline size_type offset() const { return underlying.offset(); }
 
  private:
   column_device_view underlying;
diff --git a/cpp/include/cudf/lists/lists_column_view.hpp b/cpp/include/cudf/lists/lists_column_view.hpp
index b055a050bf8..d09bc2c935f 100644
--- a/cpp/include/cudf/lists/lists_column_view.hpp
+++ b/cpp/include/cudf/lists/lists_column_view.hpp
@@ -63,21 +63,21 @@ class lists_column_view : private column_view {
   /**
    * @brief Returns the parent column.
    */
-  column_view parent() const;
+  [[nodiscard]] column_view parent() const;
 
   /**
    * @brief Returns the internal column of offsets
    *
    * @throw cudf::logic error if this is an empty column
    */
-  column_view offsets() const;
+  [[nodiscard]] column_view offsets() const;
 
   /**
    * @brief Returns the internal child column
    *
    * @throw cudf::logic error if this is an empty column
    */
-  column_view child() const;
+  [[nodiscard]] column_view child() const;
 
   /**
    * @brief Returns the internal child column, applying any offset from the root.
@@ -89,14 +89,14 @@ class lists_column_view : private column_view {
    *
    * @throw cudf::logic error if this is an empty column
    */
-  column_view get_sliced_child(rmm::cuda_stream_view stream) const;
+  [[nodiscard]] column_view get_sliced_child(rmm::cuda_stream_view stream) const;
 
   /**
    * @brief Return first offset (accounting for column offset)
    *
    * @return int32_t const* Pointer to the first offset
    */
-  offset_iterator offsets_begin() const noexcept
+  [[nodiscard]] offset_iterator offsets_begin() const noexcept
   {
     return offsets().begin<offset_type>() + offset();
   }
@@ -111,7 +111,10 @@ class lists_column_view : private column_view {
    *
    * @return int32_t const* Pointer to one past the last offset
    */
-  offset_iterator offsets_end() const noexcept { return offsets_begin() + size() + 1; }
+  [[nodiscard]] offset_iterator offsets_end() const noexcept
+  {
+    return offsets_begin() + size() + 1;
+  }
 };
 /** @} */  // end of group
 }  // namespace cudf
diff --git a/cpp/include/cudf/rolling/range_window_bounds.hpp b/cpp/include/cudf/rolling/range_window_bounds.hpp
index a4f0a51eac7..4d31bb98f9c 100644
--- a/cpp/include/cudf/rolling/range_window_bounds.hpp
+++ b/cpp/include/cudf/rolling/range_window_bounds.hpp
@@ -56,12 +56,12 @@ struct range_window_bounds {
    * @return true If window is unbounded
    * @return false If window is of finite bounds
    */
-  bool is_unbounded() const { return _is_unbounded; }
+  [[nodiscard]] bool is_unbounded() const { return _is_unbounded; }
 
   /**
    * @brief Returns the underlying scalar value for the bounds
    */
-  scalar const& range_scalar() const { return *_range_scalar; }
+  [[nodiscard]] scalar const& range_scalar() const { return *_range_scalar; }
 
   range_window_bounds(range_window_bounds const&) =
     default;                        // Required to return (by copy) from functions.
diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp
index dc2df368bae..0db729aec28 100644
--- a/cpp/include/cudf/scalar/scalar.hpp
+++ b/cpp/include/cudf/scalar/scalar.hpp
@@ -52,7 +52,7 @@ class scalar {
   /**
    * @brief Returns the scalar's logical value type.
    */
-  data_type type() const noexcept;
+  [[nodiscard]] data_type type() const noexcept;
 
   /**
    * @brief Updates the validity of the value.
@@ -72,7 +72,7 @@ class scalar {
    * @return true Value is valid.
    * @return false Value is invalid/null.
    */
-  bool is_valid(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
+  [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
 
   /**
    * @brief Returns a raw pointer to the validity bool in device memory.
@@ -82,7 +82,7 @@ class scalar {
   /**
    * @brief Returns a const raw pointer to the validity bool in device memory.
    */
-  bool const* validity_data() const;
+  [[nodiscard]] bool const* validity_data() const;
 
  protected:
   data_type _type{type_id::EMPTY};     ///< Logical type of value in the scalar
@@ -128,7 +128,7 @@ class fixed_width_scalar : public scalar {
  public:
   using value_type = T;
 
-  ~fixed_width_scalar()                          = default;
+  ~fixed_width_scalar() override                 = default;
   fixed_width_scalar(fixed_width_scalar&& other) = default;
 
   fixed_width_scalar& operator=(fixed_width_scalar const& other) = delete;
@@ -278,7 +278,7 @@ class fixed_point_scalar : public scalar {
   using value_type = T;
 
   fixed_point_scalar()                           = delete;
-  ~fixed_point_scalar()                          = default;
+  ~fixed_point_scalar() override                 = default;
   fixed_point_scalar(fixed_point_scalar&& other) = default;
 
   fixed_point_scalar& operator=(fixed_point_scalar const& other) = delete;
@@ -392,7 +392,7 @@ class string_scalar : public scalar {
   using value_type = cudf::string_view;
 
   string_scalar()                      = delete;
-  ~string_scalar()                     = default;
+  ~string_scalar() override            = default;
   string_scalar(string_scalar&& other) = default;
 
   // string_scalar(string_scalar const& other) = delete;
@@ -479,24 +479,25 @@ class string_scalar : public scalar {
    *
    * @param stream CUDA stream used for device memory operations.
    */
-  std::string to_string(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
+  [[nodiscard]] std::string to_string(
+    rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
 
   /**
    * @brief Get the value of the scalar as a string_view.
    *
    * @param stream CUDA stream used for device memory operations.
    */
-  value_type value(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
+  [[nodiscard]] value_type value(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
 
   /**
    * @brief Returns the size of the string in bytes.
    */
-  size_type size() const;
+  [[nodiscard]] size_type size() const;
 
   /**
    * @brief Returns a raw pointer to the string in device memory.
    */
-  const char* data() const;
+  [[nodiscard]] const char* data() const;
 
  protected:
   rmm::device_buffer _data{};  ///< device memory containing the string
@@ -647,7 +648,7 @@ class duration_scalar : public chrono_scalar<T> {
 class list_scalar : public scalar {
  public:
   list_scalar()                    = delete;
-  ~list_scalar()                   = default;
+  ~list_scalar() override          = default;
   list_scalar(list_scalar&& other) = default;
 
   list_scalar& operator=(list_scalar const& other) = delete;
@@ -695,7 +696,7 @@ class list_scalar : public scalar {
   /**
    * @brief Returns a non-owning, immutable view to underlying device data.
    */
-  column_view view() const;
+  [[nodiscard]] column_view view() const;
 
  private:
   cudf::column _data;
@@ -707,7 +708,7 @@ class list_scalar : public scalar {
 class struct_scalar : public scalar {
  public:
   struct_scalar()                      = delete;
-  ~struct_scalar()                     = default;
+  ~struct_scalar() override            = default;
   struct_scalar(struct_scalar&& other) = default;
   struct_scalar& operator=(struct_scalar const& other) = delete;
   struct_scalar& operator=(struct_scalar&& other) = delete;
@@ -765,7 +766,7 @@ class struct_scalar : public scalar {
   /**
    * @brief Returns a non-owning, immutable view to underlying device data.
    */
-  table_view view() const;
+  [[nodiscard]] table_view view() const;
 
  private:
   table _data;
diff --git a/cpp/include/cudf/scalar/scalar_device_view.cuh b/cpp/include/cudf/scalar/scalar_device_view.cuh
index 56afa150dfc..ae658da9f9b 100644
--- a/cpp/include/cudf/scalar/scalar_device_view.cuh
+++ b/cpp/include/cudf/scalar/scalar_device_view.cuh
@@ -37,7 +37,7 @@ class scalar_device_view_base {
   /**
    * @brief Returns the value type
    */
-  __host__ __device__ data_type type() const noexcept { return _type; }
+  [[nodiscard]] __host__ __device__ data_type type() const noexcept { return _type; }
 
   /**
    * @brief Returns whether the scalar holds a valid value (i.e., not null).
@@ -45,7 +45,7 @@ class scalar_device_view_base {
    * @return true The element is valid
    * @return false The element is null
    */
-  __device__ bool is_valid() const noexcept { return *_is_valid; }
+  [[nodiscard]] __device__ bool is_valid() const noexcept { return *_is_valid; }
 
   /**
    * @brief Updates the validity of the value
@@ -260,17 +260,23 @@ class string_scalar_device_view : public detail::scalar_device_view_base {
   /**
    * @brief Returns string_view of the value of this scalar.
    */
-  __device__ ValueType value() const noexcept { return ValueType{this->data(), _size}; }
+  [[nodiscard]] __device__ ValueType value() const noexcept
+  {
+    return ValueType{this->data(), _size};
+  }
 
   /**
    * @brief Returns a raw pointer to the value in device memory
    */
-  __device__ char const* data() const noexcept { return static_cast<char const*>(_data); }
+  [[nodiscard]] __device__ char const* data() const noexcept
+  {
+    return static_cast<char const*>(_data);
+  }
 
   /**
    * @brief Returns the size of the string in bytes.
    */
-  __device__ size_type size() const noexcept { return _size; }
+  [[nodiscard]] __device__ size_type size() const noexcept { return _size; }
 
  private:
   const char* _data{};  ///< Pointer to device memory containing the value
diff --git a/cpp/include/cudf/strings/json.hpp b/cpp/include/cudf/strings/json.hpp
index 8435c47eaf5..f6645f2e029 100644
--- a/cpp/include/cudf/strings/json.hpp
+++ b/cpp/include/cudf/strings/json.hpp
@@ -48,7 +48,10 @@ class get_json_object_options {
    * @brief Returns true/false depending on whether single-quotes for representing strings
    * are allowed.
    */
-  CUDF_HOST_DEVICE inline bool get_allow_single_quotes() const { return allow_single_quotes; }
+  [[nodiscard]] CUDF_HOST_DEVICE inline bool get_allow_single_quotes() const
+  {
+    return allow_single_quotes;
+  }
 
   /**
    * @brief Returns true/false depending on whether individually returned string values have
@@ -72,7 +75,7 @@ class get_json_object_options {
    *
    * @endcode
    */
-  CUDF_HOST_DEVICE inline bool get_strip_quotes_from_single_strings() const
+  [[nodiscard]] CUDF_HOST_DEVICE inline bool get_strip_quotes_from_single_strings() const
   {
     return strip_quotes_from_single_strings;
   }
diff --git a/cpp/include/cudf/strings/repeat_strings.hpp b/cpp/include/cudf/strings/repeat_strings.hpp
index edba01b174f..f6bf12af967 100644
--- a/cpp/include/cudf/strings/repeat_strings.hpp
+++ b/cpp/include/cudf/strings/repeat_strings.hpp
@@ -18,6 +18,8 @@
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
+#include <optional>
+
 namespace cudf {
 namespace strings {
 /**
diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp
index a2c4eba1636..0e904958d15 100644
--- a/cpp/include/cudf/strings/replace_re.hpp
+++ b/cpp/include/cudf/strings/replace_re.hpp
@@ -20,6 +20,8 @@
 #include <cudf/strings/regex/flags.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
+#include <optional>
+
 namespace cudf {
 namespace strings {
 /**
diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh
index 43a90997c86..24c8bfea2be 100644
--- a/cpp/include/cudf/strings/string_view.cuh
+++ b/cpp/include/cudf/strings/string_view.cuh
@@ -44,7 +44,7 @@ namespace detail {
  */
 __device__ inline size_type characters_in_string(const char* str, size_type bytes)
 {
-  if ((str == 0) || (bytes == 0)) return 0;
+  if ((str == nullptr) || (bytes == 0)) return 0;
   auto ptr = reinterpret_cast<uint8_t const*>(str);
 #ifndef CUDF_JIT_UDF
   return thrust::count_if(
@@ -271,9 +271,9 @@ __device__ inline int string_view::compare(const string_view& in) const
 
 __device__ inline int string_view::compare(const char* data, size_type bytes) const
 {
-  size_type const len1      = size_bytes();
-  const unsigned char* ptr1 = reinterpret_cast<const unsigned char*>(this->data());
-  const unsigned char* ptr2 = reinterpret_cast<const unsigned char*>(data);
+  size_type const len1 = size_bytes();
+  const auto* ptr1     = reinterpret_cast<const unsigned char*>(this->data());
+  const auto* ptr2     = reinterpret_cast<const unsigned char*>(data);
   if ((ptr1 == ptr2) && (bytes == len1)) return 0;
   size_type idx = 0;
   for (; (idx < len1) && (idx < bytes); ++idx) {
diff --git a/cpp/include/cudf/strings/string_view.hpp b/cpp/include/cudf/strings/string_view.hpp
index 22409ab3dc7..f88f573ac0c 100644
--- a/cpp/include/cudf/strings/string_view.hpp
+++ b/cpp/include/cudf/strings/string_view.hpp
@@ -51,20 +51,20 @@ class string_view {
   /**
    * @brief Return the number of bytes in this string
    */
-  CUDF_HOST_DEVICE inline size_type size_bytes() const { return _bytes; }
+  CUDF_HOST_DEVICE [[nodiscard]] inline size_type size_bytes() const { return _bytes; }
   /**
    * @brief Return the number of characters in this string
    */
-  __device__ inline size_type length() const;
+  __device__ [[nodiscard]] inline size_type length() const;
   /**
    * @brief Return a pointer to the internal device array
    */
-  CUDF_HOST_DEVICE inline const char* data() const { return _data; }
+  CUDF_HOST_DEVICE [[nodiscard]] inline const char* data() const { return _data; }
 
   /**
    * @brief Return true if string has no characters
    */
-  CUDF_HOST_DEVICE inline bool empty() const { return size_bytes() == 0; }
+  CUDF_HOST_DEVICE [[nodiscard]] inline bool empty() const { return size_bytes() == 0; }
 
   /**
    * @brief Handy iterator for navigating through encoded characters.
@@ -96,8 +96,8 @@ class string_view {
     __device__ inline bool operator>(const const_iterator&) const;
     __device__ inline bool operator>=(const const_iterator&) const;
     __device__ inline char_utf8 operator*() const;
-    __device__ inline size_type position() const;
-    __device__ inline size_type byte_offset() const;
+    [[nodiscard]] __device__ inline size_type position() const;
+    [[nodiscard]] __device__ inline size_type byte_offset() const;
 
    private:
     const char* p{};
@@ -109,11 +109,11 @@ class string_view {
   /**
    * @brief Return new iterator pointing to the beginning of this string
    */
-  __device__ inline const_iterator begin() const;
+  __device__ [[nodiscard]] inline const_iterator begin() const;
   /**
    * @brief Return new iterator pointing past the end of this string
    */
-  __device__ inline const_iterator end() const;
+  __device__ [[nodiscard]] inline const_iterator end() const;
 
   /**
    * @brief Return single UTF-8 character at the given character position
@@ -126,7 +126,7 @@ class string_view {
    *
    * @param pos Character position
    */
-  __device__ inline size_type byte_offset(size_type pos) const;
+  __device__ [[nodiscard]] inline size_type byte_offset(size_type pos) const;
 
   /**
    * @brief Comparing target string with this string. Each character is compared
@@ -141,7 +141,7 @@ class string_view {
    *            not match is greater in the arg string, or all compared characters
    *            match but the arg string is longer.
    */
-  __device__ inline int compare(const string_view& str) const;
+  __device__ [[nodiscard]] inline int compare(const string_view& str) const;
   /**
    * @brief Comparing target string with this string. Each character is compared
    * as a UTF-8 code-point value.
@@ -193,9 +193,9 @@ class string_view {
    *              Specify -1 to indicate to the end of the string.
    * @return -1 if str is not found in this string.
    */
-  __device__ inline size_type find(const string_view& str,
-                                   size_type pos   = 0,
-                                   size_type count = -1) const;
+  __device__ [[nodiscard]] inline size_type find(const string_view& str,
+                                                 size_type pos   = 0,
+                                                 size_type count = -1) const;
   /**
    * @brief Returns the character position of the first occurrence where the
    * array str is found in this string within the character range [pos,pos+n).
@@ -221,9 +221,9 @@ class string_view {
    *              Specify -1 to indicate to the end of the string.
    * @return -1 if arg string is not found in this string.
    */
-  __device__ inline size_type find(char_utf8 character,
-                                   size_type pos   = 0,
-                                   size_type count = -1) const;
+  __device__ [[nodiscard]] inline size_type find(char_utf8 character,
+                                                 size_type pos   = 0,
+                                                 size_type count = -1) const;
   /**
    * @brief Returns the character position of the last occurrence where the
    * argument str is found in this string within the character range [pos,pos+n).
@@ -234,9 +234,9 @@ class string_view {
    *              Specify -1 to indicate to the end of the string.
    * @return -1 if arg string is not found in this string.
    */
-  __device__ inline size_type rfind(const string_view& str,
-                                    size_type pos   = 0,
-                                    size_type count = -1) const;
+  __device__ [[nodiscard]] inline size_type rfind(const string_view& str,
+                                                  size_type pos   = 0,
+                                                  size_type count = -1) const;
   /**
    * @brief Returns the character position of the last occurrence where the
    * array str is found in this string within the character range [pos,pos+n).
@@ -262,9 +262,9 @@ class string_view {
    *              Specify -1 to indicate to the end of the string.
    * @return -1 if arg string is not found in this string.
    */
-  __device__ inline size_type rfind(char_utf8 character,
-                                    size_type pos   = 0,
-                                    size_type count = -1) const;
+  __device__ [[nodiscard]] inline size_type rfind(char_utf8 character,
+                                                  size_type pos   = 0,
+                                                  size_type count = -1) const;
 
   /**
    * @brief Return a sub-string of this string. The original string and device
@@ -274,7 +274,7 @@ class string_view {
    * @param length Number of characters from start to include in the sub-string.
    * @return New instance pointing to a subset of the characters within this instance.
    */
-  __device__ inline string_view substr(size_type start, size_type length) const;
+  __device__ [[nodiscard]] inline string_view substr(size_type start, size_type length) const;
 
   /**
    * @brief Return minimum value associated with the string type
@@ -300,7 +300,7 @@ class string_view {
   /**
    * @brief Default constructor represents an empty string.
    */
-  CUDF_HOST_DEVICE inline string_view() : _data(""), _bytes(0), _length(0) {}
+  CUDF_HOST_DEVICE inline string_view() : _data("") {}
 
   /**
    * @brief Create instance from existing device char array.
@@ -330,7 +330,7 @@ class string_view {
    * @param bytepos Byte position from start of _data.
    * @return The character position for the specified byte.
    */
-  __device__ inline size_type character_offset(size_type bytepos) const;
+  __device__ [[nodiscard]] inline size_type character_offset(size_type bytepos) const;
 };
 
 namespace strings {
diff --git a/cpp/include/cudf/strings/strings_column_view.hpp b/cpp/include/cudf/strings/strings_column_view.hpp
index fb3b9387a9b..aab898932de 100644
--- a/cpp/include/cudf/strings/strings_column_view.hpp
+++ b/cpp/include/cudf/strings/strings_column_view.hpp
@@ -58,14 +58,14 @@ class strings_column_view : private column_view {
   /**
    * @brief Returns the parent column.
    */
-  column_view parent() const;
+  [[nodiscard]] column_view parent() const;
 
   /**
    * @brief Returns the internal column of offsets
    *
    * @throw cudf::logic error if this is an empty column
    */
-  column_view offsets() const;
+  [[nodiscard]] column_view offsets() const;
 
   /**
    * @brief Return an iterator for the offsets child column.
@@ -74,7 +74,7 @@ class strings_column_view : private column_view {
    *
    * @return Iterator pointing to the first offset value.
    */
-  offset_iterator offsets_begin() const;
+  [[nodiscard]] offset_iterator offsets_begin() const;
 
   /**
    * @brief Return an end iterator for the offsets child column.
@@ -83,14 +83,14 @@ class strings_column_view : private column_view {
    *
    * @return Iterator pointing 1 past the last offset value.
    */
-  offset_iterator offsets_end() const;
+  [[nodiscard]] offset_iterator offsets_end() const;
 
   /**
    * @brief Returns the internal column of chars
    *
    * @throw cudf::logic error if this is an empty column
    */
-  column_view chars() const;
+  [[nodiscard]] column_view chars() const;
 
   /**
    * @brief Returns the number of bytes in the chars child column.
@@ -98,7 +98,7 @@ class strings_column_view : private column_view {
    * This accounts for empty columns but does not reflect a sliced parent column
    * view  (i.e.: non-zero offset or reduced row count).
    */
-  size_type chars_size() const noexcept;
+  [[nodiscard]] size_type chars_size() const noexcept;
 
   /**
    * @brief Return an iterator for the chars child column.
@@ -111,7 +111,7 @@ class strings_column_view : private column_view {
    *
    * @return Iterator pointing to the first char byte.
    */
-  chars_iterator chars_begin() const;
+  [[nodiscard]] chars_iterator chars_begin() const;
 
   /**
    * @brief Return an end iterator for the offsets child column.
@@ -121,7 +121,7 @@ class strings_column_view : private column_view {
    *
    * @return Iterator pointing 1 past the last char byte.
    */
-  chars_iterator chars_end() const;
+  [[nodiscard]] chars_iterator chars_end() const;
 };
 
 //! Strings column APIs.
diff --git a/cpp/include/cudf/structs/structs_column_view.hpp b/cpp/include/cudf/structs/structs_column_view.hpp
index a25dce9c278..329c24cfe0a 100644
--- a/cpp/include/cudf/structs/structs_column_view.hpp
+++ b/cpp/include/cudf/structs/structs_column_view.hpp
@@ -60,7 +60,7 @@ class structs_column_view : public column_view {
    *
    * @throw cudf::logic error if this is an empty column
    */
-  column_view get_sliced_child(int index) const;
+  [[nodiscard]] column_view get_sliced_child(int index) const;
 };         // class structs_column_view;
 /** @} */  // end of group
 }  // namespace cudf
diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp
index 3c4b4dda61e..4a3c31d08e9 100644
--- a/cpp/include/cudf/table/table.hpp
+++ b/cpp/include/cudf/table/table.hpp
@@ -71,18 +71,18 @@ class table {
   /**
    * @brief Returns the number of columns in the table
    */
-  size_type num_columns() const noexcept { return _columns.size(); }
+  [[nodiscard]] size_type num_columns() const noexcept { return _columns.size(); }
 
   /**
    * @brief Returns the number of rows
    */
-  size_type num_rows() const noexcept { return _num_rows; }
+  [[nodiscard]] size_type num_rows() const noexcept { return _num_rows; }
 
   /**
    * @brief Returns an immutable, non-owning `table_view` of the contents of
    *this `table`.
    */
-  table_view view() const;
+  [[nodiscard]] table_view view() const;
 
   /**
    * @brief Conversion operator to an immutable, non-owning `table_view` of the
@@ -141,7 +141,7 @@ class table {
    * @return A table_view consisting of columns from the original table
    * specified by the elements of `column_indices`
    */
-  table_view select(std::vector<cudf::size_type> const& column_indices) const
+  [[nodiscard]] table_view select(std::vector<cudf::size_type> const& column_indices) const
   {
     return select(column_indices.begin(), column_indices.end());
   };
@@ -166,7 +166,7 @@ class table {
    * @param i Index of the desired column
    * @return A const reference to the desired column
    */
-  column const& get_column(cudf::size_type i) const { return *(_columns.at(i)); }
+  [[nodiscard]] column const& get_column(cudf::size_type i) const { return *(_columns.at(i)); }
 
  private:
   std::vector<std::unique_ptr<column>> _columns{};
diff --git a/cpp/include/cudf/table/table_device_view.cuh b/cpp/include/cudf/table/table_device_view.cuh
index 2404fe88a9c..ce61e8853b6 100644
--- a/cpp/include/cudf/table/table_device_view.cuh
+++ b/cpp/include/cudf/table/table_device_view.cuh
@@ -61,9 +61,9 @@ class table_device_view_base {
     return _columns[column_index];
   }
 
-  __host__ __device__ size_type num_columns() const noexcept { return _num_columns; }
+  [[nodiscard]] __host__ __device__ size_type num_columns() const noexcept { return _num_columns; }
 
-  __host__ __device__ size_type num_rows() const noexcept { return _num_rows; }
+  [[nodiscard]] __host__ __device__ size_type num_rows() const noexcept { return _num_rows; }
 
   void destroy();
 
diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp
index 8abd7aed8e9..77b9e539506 100644
--- a/cpp/include/cudf/table/table_view.hpp
+++ b/cpp/include/cudf/table/table_view.hpp
@@ -87,7 +87,7 @@ class table_view_base {
   /**
    * @brief Returns an iterator to the first view in the `table`.
    */
-  const_iterator begin() const noexcept { return std::begin(_columns); }
+  [[nodiscard]] const_iterator begin() const noexcept { return std::begin(_columns); }
 
   /**
    * @brief Returns an iterator one past the last column view in the `table`.
@@ -103,7 +103,7 @@ class table_view_base {
    * `end()` acts as a place holder. Attempting to dereference it results in
    * undefined behavior.
    */
-  const_iterator end() const noexcept { return std::end(_columns); }
+  [[nodiscard]] const_iterator end() const noexcept { return std::end(_columns); }
 
   /**
    * @brief Returns a reference to the view of the specified column
@@ -119,17 +119,17 @@ class table_view_base {
   /**
    * @brief Returns the number of columns
    */
-  size_type num_columns() const noexcept { return _columns.size(); }
+  [[nodiscard]] size_type num_columns() const noexcept { return _columns.size(); }
 
   /**
    * @brief Returns the number of rows
    */
-  size_type num_rows() const noexcept { return _num_rows; }
+  [[nodiscard]] size_type num_rows() const noexcept { return _num_rows; }
 
   /**
    * @brief Returns true if `num_columns()` returns zero, or false otherwise
    */
-  size_type is_empty() const noexcept { return num_columns() == 0; }
+  [[nodiscard]] size_type is_empty() const noexcept { return num_columns() == 0; }
 
   table_view_base() = default;
 
@@ -208,7 +208,7 @@ class table_view : public detail::table_view_base<column_view> {
    * @return A table_view consisting of columns from the original table
    * specified by the elements of `column_indices`
    */
-  table_view select(std::vector<size_type> const& column_indices) const;
+  [[nodiscard]] table_view select(std::vector<size_type> const& column_indices) const;
 };
 
 /**
@@ -227,7 +227,7 @@ class mutable_table_view : public detail::table_view_base<mutable_column_view> {
 
   mutable_table_view() = default;
 
-  mutable_column_view& column(size_type column_index) const
+  [[nodiscard]] mutable_column_view& column(size_type column_index) const
   {
     return const_cast<mutable_column_view&>(table_view_base::column(column_index));
   }
diff --git a/cpp/include/cudf/tdigest/tdigest_column_view.cuh b/cpp/include/cudf/tdigest/tdigest_column_view.cuh
index c7513452387..696657191ca 100644
--- a/cpp/include/cudf/tdigest/tdigest_column_view.cuh
+++ b/cpp/include/cudf/tdigest/tdigest_column_view.cuh
@@ -82,28 +82,28 @@ class tdigest_column_view : private column_view {
   /**
    * @brief Returns the parent column.
    */
-  column_view parent() const;
+  [[nodiscard]] column_view parent() const;
 
   /**
    * @brief Returns the column of centroids
    */
-  lists_column_view centroids() const;
+  [[nodiscard]] lists_column_view centroids() const;
 
   /**
    * @brief Returns the internal column of mean values
    */
-  column_view means() const;
+  [[nodiscard]] column_view means() const;
 
   /**
    * @brief Returns the internal column of weight values
    */
-  column_view weights() const;
+  [[nodiscard]] column_view weights() const;
 
   /**
    * @brief Returns an iterator that returns the size of each tdigest
    * in the column (each row is 1 digest)
    */
-  auto size_begin() const
+  [[nodiscard]] auto size_begin() const
   {
     return cudf::detail::make_counting_transform_iterator(
       0, tdigest_size{centroids().offsets_begin()});
@@ -113,13 +113,13 @@ class tdigest_column_view : private column_view {
    * @brief Returns the first min value for the column. Each row corresponds
    * to the minimum value for the accompanying digest.
    */
-  double const* min_begin() const;
+  [[nodiscard]] double const* min_begin() const;
 
   /**
    * @brief Returns the first max value for the column. Each row corresponds
    * to the maximum value for the accompanying digest.
    */
-  double const* max_begin() const;
+  [[nodiscard]] double const* max_begin() const;
 };
 
 }  // namespace tdigest
diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp
index 459a4182aa0..6222b2e680e 100644
--- a/cpp/include/cudf/types.hpp
+++ b/cpp/include/cudf/types.hpp
@@ -268,12 +268,12 @@ class data_type {
   /**
    * @brief Returns the type identifier
    */
-  constexpr type_id id() const noexcept { return _id; }
+  [[nodiscard]] constexpr type_id id() const noexcept { return _id; }
 
   /**
    * @brief Returns the scale (for fixed_point types)
    */
-  constexpr int32_t scale() const noexcept { return _fixed_point_scale; }
+  [[nodiscard]] constexpr int32_t scale() const noexcept { return _fixed_point_scale; }
 
  private:
   type_id _id{type_id::EMPTY};
diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp
index 766fe93b9d1..0ac41b2c4a1 100644
--- a/cpp/include/cudf/utilities/span.hpp
+++ b/cpp/include/cudf/utilities/span.hpp
@@ -54,7 +54,7 @@ class span_base {
 
   static constexpr std::size_t extent = Extent;
 
-  constexpr span_base() noexcept : _data(nullptr), _size(0) {}
+  constexpr span_base() noexcept {}
   constexpr span_base(pointer data, size_type size) : _data(data), _size(size) {}
   // constexpr span_base(pointer begin, pointer end) : _data(begin), _size(end - begin) {}
   constexpr span_base(span_base const& other) noexcept = default;
@@ -71,9 +71,9 @@ class span_base {
   constexpr iterator end() const noexcept { return _data + _size; }
   constexpr pointer data() const noexcept { return _data; }
 
-  constexpr size_type size() const noexcept { return _size; }
-  constexpr size_type size_bytes() const noexcept { return sizeof(T) * _size; }
-  constexpr bool empty() const noexcept { return _size == 0; }
+  [[nodiscard]] constexpr size_type size() const noexcept { return _size; }
+  [[nodiscard]] constexpr size_type size_bytes() const noexcept { return sizeof(T) * _size; }
+  [[nodiscard]] constexpr bool empty() const noexcept { return _size == 0; }
 
   /**
    * @brief Obtains a subspan consisting of the first N elements of the sequence
@@ -98,8 +98,8 @@ class span_base {
   }
 
  private:
-  pointer _data;
-  size_type _size;
+  pointer _data{nullptr};
+  size_type _size{0};
 };
 
 }  // namespace detail
@@ -251,7 +251,7 @@ class base_2dspan {
   constexpr auto data() const noexcept { return _data; }
   constexpr auto size() const noexcept { return _size; }
   constexpr auto count() const noexcept { return size().first * size().second; }
-  constexpr bool is_empty() const noexcept { return count() == 0; }
+  [[nodiscard]] constexpr bool is_empty() const noexcept { return count() == 0; }
 
   static constexpr size_t flatten_index(size_t row, size_t column, size_type size) noexcept
   {
@@ -263,8 +263,11 @@ class base_2dspan {
     return {this->data() + flatten_index(row, 0, this->size()), this->size().second};
   }
 
-  constexpr RowType<T, dynamic_extent> front() const { return (*this)[0]; }
-  constexpr RowType<T, dynamic_extent> back() const { return (*this)[size().first - 1]; }
+  [[nodiscard]] constexpr RowType<T, dynamic_extent> front() const { return (*this)[0]; }
+  [[nodiscard]] constexpr RowType<T, dynamic_extent> back() const
+  {
+    return (*this)[size().first - 1];
+  }
 
   constexpr base_2dspan subspan(size_t first_row, size_t num_rows) const noexcept
   {
diff --git a/cpp/include/cudf_test/cudf_gtest.hpp b/cpp/include/cudf_test/cudf_gtest.hpp
index 87e4c94070b..d078bf90a8a 100644
--- a/cpp/include/cudf_test/cudf_gtest.hpp
+++ b/cpp/include/cudf_test/cudf_gtest.hpp
@@ -79,7 +79,7 @@ using Templates0 = Templates<>;
 
 template <typename T>
 struct TypeList {
-  typedef Types<T> type;
+  using type = Types<T>;
 };
 
 template <class... TYPES>
diff --git a/cpp/include/cudf_test/file_utilities.hpp b/cpp/include/cudf_test/file_utilities.hpp
index 8e242e5a4f3..6c21d8dfad2 100644
--- a/cpp/include/cudf_test/file_utilities.hpp
+++ b/cpp/include/cudf_test/file_utilities.hpp
@@ -58,5 +58,5 @@ class temp_directory {
    *
    * @return string path of the temporary directory
    */
-  const std::string& path() const { return _path; }
+  [[nodiscard]] const std::string& path() const { return _path; }
 };
diff --git a/cpp/include/nvtext/detail/load_hash_file.hpp b/cpp/include/nvtext/detail/load_hash_file.hpp
index b105c5c280e..9f4640f1daf 100644
--- a/cpp/include/nvtext/detail/load_hash_file.hpp
+++ b/cpp/include/nvtext/detail/load_hash_file.hpp
@@ -21,8 +21,8 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
-#include <stdint.h>
-#include <string.h>
+#include <cstdint>
+#include <cstring>
 
 namespace nvtext {
 namespace detail {
diff --git a/cpp/include/nvtext/subword_tokenize.hpp b/cpp/include/nvtext/subword_tokenize.hpp
index 2b09ec66203..43cc059eddd 100644
--- a/cpp/include/nvtext/subword_tokenize.hpp
+++ b/cpp/include/nvtext/subword_tokenize.hpp
@@ -19,8 +19,8 @@
 #include <cudf/column/column_view.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
-#include <stdint.h>
-#include <string.h>
+#include <cstdint>
+#include <cstring>
 
 namespace nvtext {
 
diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt
index e6abba207d9..bdb7e8afcf9 100644
--- a/cpp/libcudf_kafka/CMakeLists.txt
+++ b/cpp/libcudf_kafka/CMakeLists.txt
@@ -22,7 +22,7 @@ include(rapids-find)
 
 project(
   CUDA_KAFKA
-  VERSION 22.02.00
+  VERSION 22.04.00
   LANGUAGES CXX
 )
 
diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py
new file mode 100644
index 00000000000..3a1a663e231
--- /dev/null
+++ b/cpp/scripts/run-clang-tidy.py
@@ -0,0 +1,254 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+import re
+import os
+import subprocess
+import argparse
+import json
+import multiprocessing as mp
+import shutil
+
+
+EXPECTED_VERSION = "11.1.0"
+VERSION_REGEX = re.compile(r"  LLVM version ([0-9.]+)")
+GPU_ARCH_REGEX = re.compile(r"sm_(\d+)")
+SPACES = re.compile(r"\s+")
+SEPARATOR = "-" * 16
+
+
+def parse_args():
+    argparser = argparse.ArgumentParser("Runs clang-tidy on a project")
+    argparser.add_argument("-cdb", type=str,
+                           # TODO This is a hack, needs to be fixed
+                           default="cpp/build/cuda-11.5.0/clang-tidy/release/compile_commands.clangd.json",
+                           help="Path to cmake-generated compilation database"
+                           " file. It is always found inside the root of the "
+                           "cmake build folder. So make sure that `cmake` has "
+                           "been run once before running this script!")
+    argparser.add_argument("-exe", type=str, default="clang-tidy",
+                           help="Path to clang-tidy exe")
+    argparser.add_argument("-ignore", type=str, default="[.]cu$|examples/kmeans/",
+                           help="Regex used to ignore files from checking")
+    argparser.add_argument("-select", type=str, default=None,
+                           help="Regex used to select files for checking")
+    argparser.add_argument("-j", type=int, default=-1,
+                           help="Number of parallel jobs to launch.")
+    args = argparser.parse_args()
+    if args.j <= 0:
+        args.j = mp.cpu_count()
+    args.ignore_compiled = re.compile(args.ignore) if args.ignore else None
+    args.select_compiled = re.compile(args.select) if args.select else None
+    ret = subprocess.check_output("%s --version" % args.exe, shell=True)
+    ret = ret.decode("utf-8")
+    version = VERSION_REGEX.search(ret)
+    if version is None:
+        raise Exception("Failed to figure out clang-tidy version!")
+    version = version.group(1)
+    if version != EXPECTED_VERSION:
+        raise Exception("clang-tidy exe must be v%s found '%s'" % \
+                        (EXPECTED_VERSION, version))
+    if not os.path.exists(args.cdb):
+        raise Exception("Compilation database '%s' missing" % args.cdb)
+    return args
+
+
+def get_all_commands(cdb):
+    with open(cdb, "r") as fp:
+        return json.load(fp)
+
+
+def get_gpu_archs(command):
+    archs = []
+    for loc in range(len(command)):
+        if command[loc] != "-gencode":
+            continue
+        arch_flag = command[loc + 1]
+        match = GPU_ARCH_REGEX.search(arch_flag)
+        if match is not None:
+            archs.append("--cuda-gpu-arch=sm_%s" % match.group(1))
+    return archs
+
+
+def get_index(arr, item):
+    try:
+        return arr.index(item)
+    except:
+        return -1
+
+
+def remove_item(arr, item):
+    loc = get_index(arr, item)
+    if loc >= 0:
+        del arr[loc]
+    return loc
+
+
+def remove_item_plus_one(arr, item):
+    loc = get_index(arr, item)
+    if loc >= 0:
+        del arr[loc + 1]
+        del arr[loc]
+    return loc
+
+
+def get_clang_includes(exe):
+    dir = os.getenv("CONDA_PREFIX")
+    if dir is None:
+        ret = subprocess.check_output("which %s 2>&1" % exe, shell=True)
+        ret = ret.decode("utf-8")
+        dir = os.path.dirname(os.path.dirname(ret))
+    header = os.path.join(dir, "include", "ClangHeaders")
+    return ["-I", header]
+
+
+def get_tidy_args(cmd, exe):
+    command, file = cmd["command"], cmd["file"]
+    is_cuda = file.endswith(".cu")
+    command = re.split(SPACES, command)
+    # compiler is always clang++!
+    command[0] = "clang++"
+    # remove compilation and output targets from the original command
+    remove_item_plus_one(command, "-c")
+    remove_item_plus_one(command, "-o")
+    if is_cuda:
+        # replace nvcc's "-gencode ..." with clang's "--cuda-gpu-arch ..."
+        archs = get_gpu_archs(command)
+        command.extend(archs)
+        while True:
+            loc = remove_item_plus_one(command, "-gencode")
+            if loc < 0:
+                break
+        # "-x cuda" is the right usage in clang
+        loc = get_index(command, "-x")
+        if loc >= 0:
+            command[loc + 1] = "cuda"
+        remove_item_plus_one(command, "-ccbin")
+        remove_item(command, "--expt-extended-lambda")
+        remove_item(command, "--diag_suppress=unrecognized_gcc_pragma")
+    command.extend(get_clang_includes(exe))
+    return command, is_cuda
+
+
+def run_clang_tidy_command(tidy_cmd):
+    cmd = " ".join(tidy_cmd)
+    result = subprocess.run(cmd, check=False, shell=True,
+                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    status = result.returncode == 0
+    if status:
+        out = ""
+    else:
+        out = "CMD: " + cmd
+    out += result.stdout.decode("utf-8").rstrip()
+    return status, out
+
+
+def run_clang_tidy(cmd, args):
+    command, is_cuda = get_tidy_args(cmd, args.exe)
+    tidy_cmd = [args.exe,
+                "-header-filter='.*cudf/cpp/(src|include|bench|comms).*'",
+                cmd["file"], "--", ]
+    tidy_cmd.extend(command)
+    status = True
+    out = ""
+    if is_cuda:
+        tidy_cmd.append("--cuda-device-only")
+        tidy_cmd.append(cmd["file"])
+        ret, out1 = run_clang_tidy_command(tidy_cmd)
+        out += out1
+        out += "%s" % SEPARATOR
+        if not ret:
+            status = ret
+        tidy_cmd[-2] = "--cuda-host-only"
+        ret, out1 = run_clang_tidy_command(tidy_cmd)
+        if not ret:
+            status = ret
+        out += out1
+    else:
+        tidy_cmd.append(cmd["file"])
+        ret, out1 = run_clang_tidy_command(tidy_cmd)
+        if not ret:
+            status = ret
+        out += out1
+    return status, out, cmd["file"]
+
+
+# yikes! global var :(
+results = []
+def collect_result(result):
+    global results
+    results.append(result)
+
+
+def print_result(passed, stdout, file):
+    status_str = "PASSED" if passed else "FAILED"
+    print("%s File:%s %s %s" % (SEPARATOR, file, status_str, SEPARATOR))
+    if stdout:
+        print(stdout)
+        print("%s File:%s ENDS %s" % (SEPARATOR, file, SEPARATOR))
+
+
+def print_results():
+    global results
+    status = True
+    for passed, stdout, file in results:
+        print_result(passed, stdout, file)
+        if not passed:
+            status = False
+    return status
+
+
+def run_tidy_for_all_files(args, all_files):
+    pool = None if args.j == 1 else mp.Pool(args.j)
+    # actual tidy checker
+    for cmd in all_files:
+        # skip files that we don't want to look at
+        if args.ignore_compiled is not None and \
+           re.search(args.ignore_compiled, cmd["file"]) is not None:
+            continue
+        if args.select_compiled is not None and \
+           re.search(args.select_compiled, cmd["file"]) is None:
+            continue
+        if pool is not None:
+            pool.apply_async(run_clang_tidy, args=(cmd, args),
+                             callback=collect_result)
+        else:
+            passed, stdout, file = run_clang_tidy(cmd, args)
+            collect_result((passed, stdout, file))
+    if pool is not None:
+        pool.close()
+        pool.join()
+    return print_results()
+
+
+def main():
+    args = parse_args()
+    # Attempt to making sure that we run this script from root of repo always
+    if not os.path.exists(".git"):
+        raise Exception("This needs to always be run from the root of repo")
+    # Check whether clang-tidy exists
+    # print(args)
+    if "exe" not in args and shutil.which("clang-tidy") is not None:
+        print("clang-tidy not found. Exiting...")
+        return
+    all_files = get_all_commands(args.cdb)
+    status = run_tidy_for_all_files(args, all_files)
+    if not status:
+        raise Exception("clang-tidy failed! Refer to the errors above.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp
index 7087b71a84e..5f9ff2574e3 100644
--- a/cpp/src/binaryop/binaryop.cpp
+++ b/cpp/src/binaryop/binaryop.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Copyright 2018-2019 BlazingDB, Inc.
  *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
@@ -74,7 +74,8 @@ rmm::device_buffer scalar_col_valid_mask_and(column_view const& col,
 inline bool is_null_dependent(binary_operator op)
 {
   return op == binary_operator::NULL_EQUALS || op == binary_operator::NULL_MIN ||
-         op == binary_operator::NULL_MAX;
+         op == binary_operator::NULL_MAX || op == binary_operator::NULL_LOGICAL_AND ||
+         op == binary_operator::NULL_LOGICAL_OR;
 }
 
 /**
diff --git a/cpp/src/binaryop/compiled/NullLogicalAnd.cu b/cpp/src/binaryop/compiled/NullLogicalAnd.cu
new file mode 100644
index 00000000000..48ae125bc93
--- /dev/null
+++ b/cpp/src/binaryop/compiled/NullLogicalAnd.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::NullLogicalAnd>(mutable_column_device_view&,
+                                                   column_device_view const&,
+                                                   column_device_view const&,
+                                                   bool is_lhs_scalar,
+                                                   bool is_rhs_scalar,
+                                                   rmm::cuda_stream_view);
+}  // namespace cudf::binops::compiled
diff --git a/cpp/src/binaryop/compiled/NullLogicalOr.cu b/cpp/src/binaryop/compiled/NullLogicalOr.cu
new file mode 100644
index 00000000000..e0ea95ac3ee
--- /dev/null
+++ b/cpp/src/binaryop/compiled/NullLogicalOr.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::NullLogicalOr>(mutable_column_device_view&,
+                                                  column_device_view const&,
+                                                  column_device_view const&,
+                                                  bool is_lhs_scalar,
+                                                  bool is_rhs_scalar,
+                                                  rmm::cuda_stream_view);
+}  // namespace cudf::binops::compiled
diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu
index efa8cdca2cc..995c6702cf8 100644
--- a/cpp/src/binaryop/compiled/binary_ops.cu
+++ b/cpp/src/binaryop/compiled/binary_ops.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -164,13 +164,13 @@ struct compare_functor {
 // This functor performs null aware binop between two columns or a column and a scalar by
 // iterating over them on the device
 struct null_considering_binop {
-  auto get_device_view(cudf::scalar const& scalar_item) const
+  [[nodiscard]] auto get_device_view(cudf::scalar const& scalar_item) const
   {
     return get_scalar_device_view(
       static_cast<cudf::scalar_type_t<cudf::string_view>&>(const_cast<scalar&>(scalar_item)));
   }
 
-  auto get_device_view(column_device_view const& col_item) const { return col_item; }
+  [[nodiscard]] auto get_device_view(column_device_view const& col_item) const { return col_item; }
 
   template <typename LhsViewT, typename RhsViewT, typename OutT, typename CompareFunc>
   void populate_out_col(LhsViewT const& lhsv,
@@ -339,6 +339,8 @@ case binary_operator::PMOD:                 apply_binary_op<ops::PMod>(out, lhs,
 case binary_operator::NULL_EQUALS:          apply_binary_op<ops::NullEquals>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
 case binary_operator::NULL_MAX:             apply_binary_op<ops::NullMax>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
 case binary_operator::NULL_MIN:             apply_binary_op<ops::NullMin>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::NULL_LOGICAL_AND:     apply_binary_op<ops::NullLogicalAnd>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::NULL_LOGICAL_OR:      apply_binary_op<ops::NullLogicalOr>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
 default:;
 }
   // clang-format on
diff --git a/cpp/src/binaryop/compiled/binary_ops.cuh b/cpp/src/binaryop/compiled/binary_ops.cuh
index dc1cae82796..9b3e33f491e 100644
--- a/cpp/src/binaryop/compiled/binary_ops.cuh
+++ b/cpp/src/binaryop/compiled/binary_ops.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -103,6 +103,8 @@ struct ops_wrapper {
         type_dispatcher(rhs.type(), type_casted_accessor<TypeCommon>{}, i, rhs, is_rhs_scalar);
       auto result = [&]() {
         if constexpr (std::is_same_v<BinaryOperator, ops::NullEquals> or
+                      std::is_same_v<BinaryOperator, ops::NullLogicalAnd> or
+                      std::is_same_v<BinaryOperator, ops::NullLogicalOr> or
                       std::is_same_v<BinaryOperator, ops::NullMax> or
                       std::is_same_v<BinaryOperator, ops::NullMin>) {
           bool output_valid = false;
@@ -150,6 +152,8 @@ struct ops2_wrapper {
       TypeRhs y   = rhs.element<TypeRhs>(is_rhs_scalar ? 0 : i);
       auto result = [&]() {
         if constexpr (std::is_same_v<BinaryOperator, ops::NullEquals> or
+                      std::is_same_v<BinaryOperator, ops::NullLogicalAnd> or
+                      std::is_same_v<BinaryOperator, ops::NullLogicalOr> or
                       std::is_same_v<BinaryOperator, ops::NullMax> or
                       std::is_same_v<BinaryOperator, ops::NullMin>) {
           bool output_valid = false;
diff --git a/cpp/src/binaryop/compiled/operation.cuh b/cpp/src/binaryop/compiled/operation.cuh
index 75507d055e0..4b5f78dc400 100644
--- a/cpp/src/binaryop/compiled/operation.cuh
+++ b/cpp/src/binaryop/compiled/operation.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -179,8 +179,8 @@ struct PyMod {
     std::enable_if_t<(std::is_floating_point_v<std::common_type_t<TypeLhs, TypeRhs>>)>* = nullptr>
   __device__ inline auto operator()(TypeLhs x, TypeRhs y) -> double
   {
-    double x1 = static_cast<double>(x);
-    double y1 = static_cast<double>(y);
+    auto x1 = static_cast<double>(x);
+    auto y1 = static_cast<double>(y);
     return fmod(fmod(x1, y1) + y1, y1);
   }
 
@@ -415,6 +415,38 @@ struct NullMin {
     -> decltype(static_cast<common_t>(static_cast<common_t>(x) < static_cast<common_t>(y) ? x : y));
 };
 
+struct NullLogicalAnd {
+  template <typename TypeLhs, typename TypeRhs>
+  __device__ inline auto operator()(
+    TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) -> decltype(x && y)
+  {
+    bool lhs_false  = lhs_valid && !x;
+    bool rhs_false  = rhs_valid && !y;
+    bool both_valid = lhs_valid && rhs_valid;
+    output_valid    = lhs_false || rhs_false || both_valid;
+    return both_valid && !lhs_false && !rhs_false;
+  }
+  // To allow std::is_invocable_v = true
+  template <typename TypeLhs, typename TypeRhs>
+  __device__ inline auto operator()(TypeLhs x, TypeRhs y) -> decltype(x && y);
+};
+
+struct NullLogicalOr {
+  template <typename TypeLhs, typename TypeRhs>
+  __device__ inline auto operator()(
+    TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) -> decltype(x || y)
+  {
+    bool lhs_true   = lhs_valid && x;
+    bool rhs_true   = rhs_valid && y;
+    bool both_valid = lhs_valid && rhs_valid;
+    output_valid    = lhs_true || rhs_true || both_valid;
+    return lhs_true || rhs_true;
+  }
+  // To allow std::is_invocable_v = true
+  template <typename TypeLhs, typename TypeRhs>
+  __device__ inline auto operator()(TypeLhs x, TypeRhs y) -> decltype(x || y);
+};
+
 }  // namespace ops
 }  // namespace compiled
 }  // namespace binops
diff --git a/cpp/src/binaryop/compiled/util.cpp b/cpp/src/binaryop/compiled/util.cpp
index f89941a3d68..9481c236142 100644
--- a/cpp/src/binaryop/compiled/util.cpp
+++ b/cpp/src/binaryop/compiled/util.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -64,22 +64,23 @@ template <typename BinaryOperator>
 struct is_binary_operation_supported {
   // For types where Out type is fixed. (eg. comparison types)
   template <typename TypeLhs, typename TypeRhs>
-  inline constexpr bool operator()(void)
+  inline constexpr bool operator()()
   {
     if constexpr (column_device_view::has_element_accessor<TypeLhs>() and
                   column_device_view::has_element_accessor<TypeRhs>()) {
       if constexpr (has_common_type_v<TypeLhs, TypeRhs>) {
         using common_t = std::common_type_t<TypeLhs, TypeRhs>;
         return std::is_invocable_v<BinaryOperator, common_t, common_t>;
-      } else
+      } else {
         return std::is_invocable_v<BinaryOperator, TypeLhs, TypeRhs>;
+      }
     } else {
       return false;
     }
   }
 
   template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  inline constexpr bool operator()(void)
+  inline constexpr bool operator()()
   {
     if constexpr (column_device_view::has_element_accessor<TypeLhs>() and
                   column_device_view::has_element_accessor<TypeRhs>() and
@@ -166,6 +167,10 @@ struct is_supported_operation_functor {
       case binary_operator::LESS_EQUAL: return bool_op<ops::LessEqual, TypeLhs, TypeRhs>(out);
       case binary_operator::GREATER_EQUAL: return bool_op<ops::GreaterEqual, TypeLhs, TypeRhs>(out);
       case binary_operator::NULL_EQUALS: return bool_op<ops::NullEquals, TypeLhs, TypeRhs>(out);
+      case binary_operator::NULL_LOGICAL_AND:
+        return bool_op<ops::NullLogicalAnd, TypeLhs, TypeRhs>(out);
+      case binary_operator::NULL_LOGICAL_OR:
+        return bool_op<ops::NullLogicalOr, TypeLhs, TypeRhs>(out);
       default: return type_dispatcher(out, nested_support_functor<TypeLhs, TypeRhs>{}, op);
     }
     return false;
diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu
index 34c0cea683e..3412733f0b2 100644
--- a/cpp/src/copying/concatenate.cu
+++ b/cpp/src/copying/concatenate.cu
@@ -113,7 +113,7 @@ __global__ void concatenate_masks_kernel(column_device_view const* views,
       thrust::upper_bound(
         thrust::seq, output_offsets, output_offsets + number_of_views, mask_index) -
       output_offsets - 1;
-    bool bit_is_set = 1;
+    bool bit_is_set = true;
     if (source_view_index < number_of_views) {
       size_type const column_element_index = mask_index - output_offsets[source_view_index];
       bit_is_set = views[source_view_index].is_valid(column_element_index);
diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu
index f8c0006ed45..a74b97da5a1 100644
--- a/cpp/src/copying/contiguous_split.cu
+++ b/cpp/src/copying/contiguous_split.cu
@@ -1017,7 +1017,7 @@ std::vector<packed_table> contiguous_split(cudf::table_view const& input,
   rmm::device_buffer d_indices_and_source_info(indices_size + src_buf_info_size + offset_stack_size,
                                                stream,
                                                rmm::mr::get_current_device_resource());
-  size_type* d_indices         = reinterpret_cast<size_type*>(d_indices_and_source_info.data());
+  auto* d_indices              = reinterpret_cast<size_type*>(d_indices_and_source_info.data());
   src_buf_info* d_src_buf_info = reinterpret_cast<src_buf_info*>(
     reinterpret_cast<uint8_t*>(d_indices_and_source_info.data()) + indices_size);
   size_type* d_offset_stack =
@@ -1198,8 +1198,8 @@ std::vector<packed_table> contiguous_split(cudf::table_view const& input,
   rmm::device_buffer d_src_and_dst_buffers(src_bufs_size + dst_bufs_size + offset_stack_size,
                                            stream,
                                            rmm::mr::get_current_device_resource());
-  uint8_t const** d_src_bufs = reinterpret_cast<uint8_t const**>(d_src_and_dst_buffers.data());
-  uint8_t** d_dst_bufs       = reinterpret_cast<uint8_t**>(
+  auto const** d_src_bufs = reinterpret_cast<uint8_t const**>(d_src_and_dst_buffers.data());
+  uint8_t** d_dst_bufs    = reinterpret_cast<uint8_t**>(
     reinterpret_cast<uint8_t*>(d_src_and_dst_buffers.data()) + src_bufs_size);
 
   // setup src buffers
diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp
index 05330a7c492..fa3d19bdcfd 100644
--- a/cpp/src/groupby/sort/functors.hpp
+++ b/cpp/src/groupby/sort/functors.hpp
@@ -55,7 +55,7 @@ struct store_result_functor {
   /**
    * @brief Check if the groupby keys are presorted
    */
-  bool is_presorted() const { return keys_are_sorted == sorted::YES; }
+  [[nodiscard]] bool is_presorted() const { return keys_are_sorted == sorted::YES; }
 
   /**
    * @brief Get the grouped values
diff --git a/cpp/src/groupby/sort/group_std.cu b/cpp/src/groupby/sort/group_std.cu
index 9ebb516ee14..50e3b812b62 100644
--- a/cpp/src/groupby/sort/group_std.cu
+++ b/cpp/src/groupby/sort/group_std.cu
@@ -49,7 +49,7 @@ struct var_transform {
   {
     if (d_values.is_null(i)) return 0.0;
 
-    ResultType x = static_cast<ResultType>(values_iter[i]);
+    auto x = static_cast<ResultType>(values_iter[i]);
 
     size_type group_idx  = d_group_labels[i];
     size_type group_size = d_group_sizes[group_idx];
diff --git a/cpp/src/groupby/sort/group_tdigest.cu b/cpp/src/groupby/sort/group_tdigest.cu
index b7b45341ad2..f48ab852f24 100644
--- a/cpp/src/groupby/sort/group_tdigest.cu
+++ b/cpp/src/groupby/sort/group_tdigest.cu
@@ -327,7 +327,7 @@ __global__ void generate_cluster_limits_kernel(int delta,
   // compute the first cluster limit
   double nearest_w;
   int nearest_w_index;  // group-relative index into the input stream
-  while (1) {
+  while (true) {
     cur_weight = next_limit < 0 ? 0 : max(cur_weight + 1, nearest_w);
     if (cur_weight >= total_weight) { break; }
 
diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh
index a3f954920c8..64ab69cd377 100644
--- a/cpp/src/hash/concurrent_unordered_map.cuh
+++ b/cpp/src/hash/concurrent_unordered_map.cuh
@@ -242,7 +242,7 @@ class concurrent_unordered_map {
 
   __host__ __device__ mapped_type get_unused_element() const { return m_unused_element; }
 
-  __host__ __device__ size_type capacity() const { return m_capacity; }
+  [[nodiscard]] __host__ __device__ size_type capacity() const { return m_capacity; }
 
  private:
   /**
diff --git a/cpp/src/hash/concurrent_unordered_multimap.cuh b/cpp/src/hash/concurrent_unordered_multimap.cuh
index 2b92c9142ca..cdf5b6a8649 100644
--- a/cpp/src/hash/concurrent_unordered_multimap.cuh
+++ b/cpp/src/hash/concurrent_unordered_multimap.cuh
@@ -503,7 +503,7 @@ class concurrent_unordered_multimap {
     if (count_collisions) m_collisions = 0;
   }
 
-  unsigned long long get_num_collisions() const { return m_collisions; }
+  [[nodiscard]] unsigned long long get_num_collisions() const { return m_collisions; }
 
   void print()
   {
diff --git a/cpp/src/hash/hash_allocator.cuh b/cpp/src/hash/hash_allocator.cuh
index 0c4acccf33d..db836917808 100644
--- a/cpp/src/hash/hash_allocator.cuh
+++ b/cpp/src/hash/hash_allocator.cuh
@@ -26,7 +26,7 @@
 
 template <class T>
 struct managed_allocator {
-  typedef T value_type;
+  using value_type                    = T;
   rmm::mr::device_memory_resource* mr = new rmm::mr::managed_memory_resource;
 
   managed_allocator() = default;
@@ -62,7 +62,7 @@ bool operator!=(const managed_allocator<T>&, const managed_allocator<U>&)
 
 template <class T>
 struct default_allocator {
-  typedef T value_type;
+  using value_type                    = T;
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
 
   default_allocator() = default;
diff --git a/cpp/src/hash/managed.cuh b/cpp/src/hash/managed.cuh
index c6cc60a6917..c5aab78589e 100644
--- a/cpp/src/hash/managed.cuh
+++ b/cpp/src/hash/managed.cuh
@@ -22,7 +22,7 @@
 struct managed {
   static void* operator new(size_t n)
   {
-    void* ptr          = 0;
+    void* ptr          = nullptr;
     cudaError_t result = cudaMallocManaged(&ptr, n);
     if (cudaSuccess != result || 0 == ptr) throw std::bad_alloc();
     return ptr;
diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp
index 0e0ce8c4335..f368ae9fab5 100644
--- a/cpp/src/interop/dlpack.cpp
+++ b/cpp/src/interop/dlpack.cpp
@@ -168,7 +168,7 @@ std::unique_ptr<table> from_dlpack(DLManagedTensor const* managed_tensor,
   data_type const dtype = DLDataType_to_data_type(tensor.dtype);
 
   size_t const byte_width = size_of(dtype);
-  size_t const num_rows   = static_cast<size_t>(tensor.shape[0]);
+  auto const num_rows     = static_cast<size_t>(tensor.shape[0]);
   size_t const bytes      = num_rows * byte_width;
 
   // For 2D tensors, if the strides pointer is not null, then strides[1] is the
diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp
index 9d3db35fea6..c1fa10d19b7 100644
--- a/cpp/src/io/avro/avro.cpp
+++ b/cpp/src/io/avro/avro.cpp
@@ -15,7 +15,8 @@
  */
 
 #include "avro.h"
-#include <string.h>
+
+#include <cstring>
 #include <unordered_map>
 
 namespace cudf {
@@ -75,7 +76,7 @@ bool container::parse(file_metadata* md, size_t max_num_rows, size_t first_row)
   sig4 |= get_raw<uint8_t>() << 24;
   if (sig4 != avro_magic) { return false; }
   for (;;) {
-    uint32_t num_md_items = static_cast<uint32_t>(get_encoded<int64_t>());
+    auto num_md_items = static_cast<uint32_t>(get_encoded<int64_t>());
     if (num_md_items == 0) { break; }
     for (uint32_t i = 0; i < num_md_items; i++) {
       auto const key   = get_encoded<std::string>();
@@ -103,8 +104,8 @@ bool container::parse(file_metadata* md, size_t max_num_rows, size_t first_row)
     auto const block_size   = static_cast<uint32_t>(get_encoded<int64_t>());
     if (block_size <= 0 || object_count <= 0 || m_cur + block_size + 16 > m_end) { break; }
     if (object_count > first_row) {
-      uint32_t block_row = static_cast<uint32_t>(total_object_count);
-      max_block_size     = std::max(max_block_size, block_size);
+      auto block_row = static_cast<uint32_t>(total_object_count);
+      max_block_size = std::max(max_block_size, block_size);
       total_object_count += object_count;
       if (!md->block_list.size()) {
         md->skip_rows = static_cast<uint32_t>(first_row);
diff --git a/cpp/src/io/avro/avro.h b/cpp/src/io/avro/avro.h
index f84693fdba3..3dd989ffa79 100644
--- a/cpp/src/io/avro/avro.h
+++ b/cpp/src/io/avro/avro.h
@@ -19,11 +19,11 @@
 #include "avro_common.h"
 
 #include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
 #include <map>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
 #include <string>
 #include <vector>
 
@@ -85,7 +85,7 @@ class schema_parser {
   bool parse(std::vector<schema_entry>& schema, const std::string& str);
 
  protected:
-  bool more_data() const { return (m_cur < m_end); }
+  [[nodiscard]] bool more_data() const { return (m_cur < m_end); }
   std::string get_str();
 
  protected:
@@ -103,7 +103,7 @@ class container {
   {
   }
 
-  auto bytecount() const { return m_cur - m_base; }
+  [[nodiscard]] auto bytecount() const { return m_cur - m_base; }
 
   template <typename T>
   T get_raw()
diff --git a/cpp/src/io/avro/avro_common.h b/cpp/src/io/avro/avro_common.h
index 17f12da3165..1df6d176e95 100644
--- a/cpp/src/io/avro/avro_common.h
+++ b/cpp/src/io/avro/avro_common.h
@@ -17,8 +17,9 @@
 #pragma once
 
 #include <io/utilities/column_buffer.hpp>
-#include <stdint.h>
-#include <stdio.h>
+
+#include <cstdint>
+#include <cstdio>
 
 namespace cudf {
 namespace io {
diff --git a/cpp/src/io/avro/avro_gpu.cu b/cpp/src/io/avro/avro_gpu.cu
index cb1c32458a3..7985d5df345 100644
--- a/cpp/src/io/avro/avro_gpu.cu
+++ b/cpp/src/io/avro/avro_gpu.cu
@@ -120,7 +120,7 @@ avro_decode_row(schemadesc_s const* schema,
           if (dataptr != nullptr && row < max_rows) { static_cast<int64_t*>(dataptr)[row] = v; }
         } else {  // string or enum
           size_t count    = 0;
-          const char* ptr = 0;
+          const char* ptr = nullptr;
           if (kind == type_enum) {  // dictionary
             size_t idx = schema[i].count + v;
             if (idx < global_dictionary.size()) {
diff --git a/cpp/src/io/comp/brotli_dict.cpp b/cpp/src/io/comp/brotli_dict.cpp
index 3e6939bb816..ef0fab51be6 100644
--- a/cpp/src/io/comp/brotli_dict.cpp
+++ b/cpp/src/io/comp/brotli_dict.cpp
@@ -49,7 +49,8 @@ THE SOFTWARE.
 */
 
 #include "brotli_dict.h"
-#include <stdint.h>
+
+#include <cstdint>
 
 namespace cudf {
 namespace io {
@@ -6528,7 +6529,7 @@ static const brotli_dictionary_s g_dictionary = {
    136, 224, 164, 184, 224, 164, 149, 224, 165, 141, 224, 164, 176, 224, 164, 191, 224, 164, 175,
    224, 164, 164, 224, 164, 190}};
 
-const brotli_dictionary_s* get_brotli_dictionary(void) { return &g_dictionary; }
+const brotli_dictionary_s* get_brotli_dictionary() { return &g_dictionary; }
 
 }  // namespace io
 }  // namespace cudf
diff --git a/cpp/src/io/comp/brotli_dict.h b/cpp/src/io/comp/brotli_dict.h
index 4c1fec1492c..315fbd9712b 100644
--- a/cpp/src/io/comp/brotli_dict.h
+++ b/cpp/src/io/comp/brotli_dict.h
@@ -79,7 +79,7 @@ struct brotli_dictionary_s {
 constexpr int brotli_min_dictionary_word_length = 4;
 constexpr int brotli_max_dictionary_word_length = 24;
 
-const brotli_dictionary_s* get_brotli_dictionary(void);
+const brotli_dictionary_s* get_brotli_dictionary();
 
 }  // namespace io
 }  // namespace cudf
diff --git a/cpp/src/io/comp/brotli_tables.h b/cpp/src/io/comp/brotli_tables.h
index 6e869999329..72a9b40bf95 100644
--- a/cpp/src/io/comp/brotli_tables.h
+++ b/cpp/src/io/comp/brotli_tables.h
@@ -2149,14 +2149,14 @@ CONSTANT uint8_t kContextLookup[2048] = {
   7,
 };
 
-typedef struct CmdLutElement {
+using CmdLutElement = struct CmdLutElement {
   uint8_t insert_len_extra_bits;
   uint8_t copy_len_extra_bits;
   int8_t distance_code;
   uint8_t context;
   uint16_t insert_len_offset;
   uint16_t copy_len_offset;
-} CmdLutElement;
+};
 
 CONSTANT CmdLutElement kCmdLut[brotli_num_command_symbols] = {
   {0x00, 0x00, 0, 0x00, 0x0000, 0x0002},  {0x00, 0x00, 0, 0x01, 0x0000, 0x0003},
diff --git a/cpp/src/io/comp/cpu_unbz2.cpp b/cpp/src/io/comp/cpu_unbz2.cpp
index 7f37b62e9c2..113623a2e67 100644
--- a/cpp/src/io/comp/cpu_unbz2.cpp
+++ b/cpp/src/io/comp/cpu_unbz2.cpp
@@ -81,8 +81,9 @@ For more information on these sources, see the manual.
 
 #include "io_uncomp.h"
 #include "unbz2.h"
-#include <stdio.h>
-#include <stdlib.h>
+
+#include <cstdio>
+#include <cstdlib>
 #include <vector>
 
 namespace cudf {
@@ -111,15 +112,15 @@ namespace io {
 
 #define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE))
 
-typedef struct {
+using huff_s = struct {
   int32_t minLen;
   int32_t limit[BZ_MAX_CODE_LEN];
   int32_t base[BZ_MAX_CODE_LEN];
   uint16_t perm[BZ_MAX_ALPHA_SIZE];
-} huff_s;
+};
 
 // Decoder state
-typedef struct {
+using unbz_state_s = struct {
   // Input
   const uint8_t* cur;
   const uint8_t* end;
@@ -153,7 +154,7 @@ typedef struct {
   uint8_t len[BZ_MAX_ALPHA_SIZE];
 
   huff_s ht[BZ_N_GROUPS];
-} unbz_state_s;
+};
 
 // return next 32 bits
 static inline uint32_t next32bits(const unbz_state_s* s)
@@ -530,7 +531,8 @@ int32_t cpu_bz2_uncompress(
   int ret;
   size_t last_valid_block_in, last_valid_block_out;
 
-  if (dest == NULL || destLen == NULL || source == NULL || sourceLen < 12) return BZ_PARAM_ERROR;
+  if (dest == nullptr || destLen == nullptr || source == nullptr || sourceLen < 12)
+    return BZ_PARAM_ERROR;
   s.currBlockNo = 0;
 
   s.cur  = source;
diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu
index 8229245276b..b4a42a66133 100644
--- a/cpp/src/io/comp/debrotli.cu
+++ b/cpp/src/io/comp/debrotli.cu
@@ -201,8 +201,8 @@ inline __device__ uint32_t Log2Floor(uint32_t value) { return 32 - __clz(value);
 /// @brief initializes the bit reader
 __device__ void initbits(debrotli_state_s* s, const uint8_t* base, size_t len, size_t pos = 0)
 {
-  const uint8_t* p      = base + pos;
-  uint32_t prefix_bytes = (uint32_t)(((size_t)p) & 3);
+  const uint8_t* p  = base + pos;
+  auto prefix_bytes = (uint32_t)(((size_t)p) & 3);
   p -= prefix_bytes;
   s->base     = base;
   s->end      = base + len;
@@ -248,7 +248,7 @@ inline __device__ uint32_t getbits(debrotli_state_s* s, uint32_t n)
 
 inline __device__ uint32_t getbits_bytealign(debrotli_state_s* s)
 {
-  uint32_t n    = (uint32_t)((-(int32_t)s->bitpos) & 7);
+  auto n        = (uint32_t)((-(int32_t)s->bitpos) & 7);
   uint32_t bits = showbits(s, n);
   skipbits(s, n);
   return bits;
@@ -315,7 +315,7 @@ static __device__ uint8_t* local_alloc(debrotli_state_s* s, uint32_t bytes)
   int heap_used  = s->heap_used;
   auto const len = allocation_size(bytes);
   if (heap_used + len <= s->heap_limit) {
-    uint8_t* ptr = reinterpret_cast<uint8_t*>(&s->heap[heap_used]);
+    auto* ptr    = reinterpret_cast<uint8_t*>(&s->heap[heap_used]);
     s->heap_used = (uint16_t)(heap_used + len);
     return ptr;
   } else {
@@ -351,9 +351,9 @@ static __device__ uint8_t* ext_heap_alloc(uint32_t bytes,
                                           uint8_t* ext_heap_base,
                                           uint32_t ext_heap_size)
 {
-  uint32_t len                = (bytes + 0xf) & ~0xf;
-  volatile uint32_t* heap_ptr = reinterpret_cast<volatile uint32_t*>(ext_heap_base);
-  uint32_t first_free_block   = ~0;
+  uint32_t len              = (bytes + 0xf) & ~0xf;
+  volatile auto* heap_ptr   = reinterpret_cast<volatile uint32_t*>(ext_heap_base);
+  uint32_t first_free_block = ~0;
   for (;;) {
     uint32_t blk_next, blk_prev;
     first_free_block = atomicExch((unsigned int*)heap_ptr, first_free_block);
@@ -421,10 +421,10 @@ static __device__ void ext_heap_free(void* ptr,
                                      uint8_t* ext_heap_base,
                                      uint32_t ext_heap_size)
 {
-  uint32_t len                = (bytes + 0xf) & ~0xf;
-  volatile uint32_t* heap_ptr = (volatile uint32_t*)ext_heap_base;
-  uint32_t first_free_block   = ~0;
-  uint32_t cur_blk            = static_cast<uint32_t>(static_cast<uint8_t*>(ptr) - ext_heap_base);
+  uint32_t len              = (bytes + 0xf) & ~0xf;
+  volatile auto* heap_ptr   = (volatile uint32_t*)ext_heap_base;
+  uint32_t first_free_block = ~0;
+  auto cur_blk              = static_cast<uint32_t>(static_cast<uint8_t*>(ptr) - ext_heap_base);
   for (;;) {
     first_free_block = atomicExch((unsigned int*)heap_ptr, first_free_block);
     if (first_free_block != ~0) { break; }
@@ -1299,7 +1299,7 @@ static __device__ void InverseMoveToFrontTransform(debrotli_state_s* s, uint8_t*
   uint32_t i           = 1;
   uint32_t upper_bound = s->mtf_upper_bound;
   uint32_t* mtf        = &s->mtf[1];  // Make mtf[-1] addressable.
-  uint8_t* mtf_u8      = reinterpret_cast<uint8_t*>(mtf);
+  auto* mtf_u8         = reinterpret_cast<uint8_t*>(mtf);
   uint32_t pattern     = 0x03020100;  // Little-endian
 
   // Initialize list using 4 consequent values pattern.
@@ -1419,12 +1419,12 @@ static __device__ debrotli_huff_tree_group_s* HuffmanTreeGroupInit(debrotli_stat
                                                                    uint32_t max_symbol,
                                                                    uint32_t ntrees)
 {
-  debrotli_huff_tree_group_s* group = reinterpret_cast<debrotli_huff_tree_group_s*>(local_alloc(
+  auto* group          = reinterpret_cast<debrotli_huff_tree_group_s*>(local_alloc(
     s, sizeof(debrotli_huff_tree_group_s) + ntrees * sizeof(uint16_t*) - sizeof(uint16_t*)));
-  group->alphabet_size              = (uint16_t)alphabet_size;
-  group->max_symbol                 = (uint16_t)max_symbol;
-  group->num_htrees                 = (uint16_t)ntrees;
-  group->htrees[0]                  = nullptr;
+  group->alphabet_size = (uint16_t)alphabet_size;
+  group->max_symbol    = (uint16_t)max_symbol;
+  group->num_htrees    = (uint16_t)ntrees;
+  group->htrees[0]     = nullptr;
   return group;
 }
 
@@ -1640,7 +1640,7 @@ static __device__ void ProcessCommands(debrotli_state_s* s, const brotli_diction
   const uint8_t *context_map_slice, *dist_context_map_slice;
   int dist_rb_idx;
   uint32_t blen_L, blen_I, blen_D;
-  uint8_t* const dict_scratch = reinterpret_cast<uint8_t*>(
+  auto* const dict_scratch = reinterpret_cast<uint8_t*>(
     &s->hs);  // 24+13 bytes (max length of a dictionary word including prefix & suffix)
   int context_mode;
 
@@ -1808,7 +1808,7 @@ static __device__ void ProcessCommands(debrotli_state_s* s, const brotli_diction
               pos         = meta_block_len;
               copy_length = 0;
             } else {
-              int32_t offset         = (int32_t)words->offsets_by_length[copy_length];
+              auto offset            = (int32_t)words->offsets_by_length[copy_length];
               uint32_t shift         = words->size_bits_by_length[copy_length];
               uint32_t address       = distance_code - max_distance - 1;
               int32_t word_idx       = address & ((1 << shift) - 1);
@@ -1927,8 +1927,8 @@ extern "C" __global__ void __launch_bounds__(block_size, 2)
   if (z >= count) { return; }
   // Thread0: initializes shared state and decode stream header
   if (!t) {
-    uint8_t const* src = static_cast<uint8_t const*>(inputs[z].srcDevice);
-    size_t src_size    = inputs[z].srcSize;
+    auto const* src = static_cast<uint8_t const*>(inputs[z].srcDevice);
+    size_t src_size = inputs[z].srcSize;
     if (src && src_size >= 8) {
       s->error = 0;
       s->out = s->outbase = static_cast<uint8_t*>(inputs[z].dstDevice);
@@ -2084,7 +2084,7 @@ cudaError_t __host__ gpu_debrotli(gpu_inflate_input_s* inputs,
 {
   uint32_t count32 = (count > 0) ? count : 0;
   uint32_t fb_heap_size;
-  uint8_t* scratch_u8 = static_cast<uint8_t*>(scratch);
+  auto* scratch_u8 = static_cast<uint8_t*>(scratch);
   dim3 dim_block(block_size, 1);
   dim3 dim_grid(count32, 1);  // TODO: Check max grid dimensions vs max expected count
 
diff --git a/cpp/src/io/comp/gpuinflate.cu b/cpp/src/io/comp/gpuinflate.cu
index dab8ce1afa5..508e960430d 100644
--- a/cpp/src/io/comp/gpuinflate.cu
+++ b/cpp/src/io/comp/gpuinflate.cu
@@ -926,8 +926,8 @@ __device__ void copy_stored(inflate_state_s* s, int t)
   __syncthreads();
   if (t == 0) {
     // Reset bitstream to end of block
-    uint8_t* p            = cur + len;
-    uint32_t prefix_bytes = (uint32_t)(((size_t)p) & 3);
+    uint8_t* p        = cur + len;
+    auto prefix_bytes = (uint32_t)(((size_t)p) & 3);
     p -= prefix_bytes;
     s->cur      = p;
     s->bitbuf.x = (p < s->end) ? *reinterpret_cast<uint32_t*>(p) : 0;
@@ -952,7 +952,7 @@ __device__ void prefetch_warp(volatile inflate_state_s* s, int t)
   const uint8_t* cur_p = s->pref.cur_p;
   const uint8_t* end   = s->end;
   while (shuffle((t == 0) ? s->pref.run : 0)) {
-    int32_t cur_lo = (int32_t)(size_t)cur_p;
+    auto cur_lo = (int32_t)(size_t)cur_p;
     int do_pref =
       shuffle((t == 0) ? (cur_lo - *(volatile int32_t*)&s->cur < prefetch_size - 32 * 4 - 4) : 0);
     if (do_pref) {
@@ -1035,7 +1035,7 @@ __global__ void __launch_bounds__(block_size)
   inflate_state_s* state = &state_g;
 
   if (!t) {
-    uint8_t* p      = const_cast<uint8_t*>(static_cast<uint8_t const*>(inputs[z].srcDevice));
+    auto* p         = const_cast<uint8_t*>(static_cast<uint8_t const*>(inputs[z].srcDevice));
     size_t src_size = inputs[z].srcSize;
     uint32_t prefix_bytes;
     // Parse header if needed
@@ -1181,8 +1181,8 @@ __global__ void __launch_bounds__(1024) copy_uncompressed_kernel(gpu_inflate_inp
   src_align_bytes = (uint32_t)(3 & reinterpret_cast<uintptr_t>(src));
   src_align_bits  = src_align_bytes << 3;
   while (len >= 32) {
-    const uint32_t* src32 = reinterpret_cast<const uint32_t*>(src - src_align_bytes);
-    uint32_t copy_cnt     = min(len >> 2, 1024);
+    const auto* src32 = reinterpret_cast<const uint32_t*>(src - src_align_bytes);
+    uint32_t copy_cnt = min(len >> 2, 1024);
     if (t < copy_cnt) {
       uint32_t v = src32[t];
       if (src_align_bits != 0) { v = __funnelshift_r(v, src32[t + 1], src_align_bits); }
diff --git a/cpp/src/io/comp/gpuinflate.h b/cpp/src/io/comp/gpuinflate.h
index 3ca9c9eee10..29856bcd3f3 100644
--- a/cpp/src/io/comp/gpuinflate.h
+++ b/cpp/src/io/comp/gpuinflate.h
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include <stdint.h>
+#include <cstdint>
 
 #include <rmm/cuda_stream_view.hpp>
 
diff --git a/cpp/src/io/comp/snap.cu b/cpp/src/io/comp/snap.cu
index d55c06a7d96..9f0a610f8f7 100644
--- a/cpp/src/io/comp/snap.cu
+++ b/cpp/src/io/comp/snap.cu
@@ -55,9 +55,9 @@ static inline __device__ uint32_t snap_hash(uint32_t v)
  */
 static inline __device__ uint32_t fetch4(const uint8_t* src)
 {
-  uint32_t src_align    = 3 & reinterpret_cast<uintptr_t>(src);
-  const uint32_t* src32 = reinterpret_cast<const uint32_t*>(src - src_align);
-  uint32_t v            = src32[0];
+  uint32_t src_align = 3 & reinterpret_cast<uintptr_t>(src);
+  const auto* src32  = reinterpret_cast<const uint32_t*>(src - src_align);
+  uint32_t v         = src32[0];
   return (src_align) ? __funnelshift_r(v, src32[1], src_align * 8) : v;
 }
 
@@ -268,15 +268,15 @@ __global__ void __launch_bounds__(128)
   const uint8_t* src;
 
   if (!t) {
-    const uint8_t* src = static_cast<const uint8_t*>(inputs[blockIdx.x].srcDevice);
-    uint32_t src_len   = static_cast<uint32_t>(inputs[blockIdx.x].srcSize);
-    uint8_t* dst       = static_cast<uint8_t*>(inputs[blockIdx.x].dstDevice);
-    uint32_t dst_len   = static_cast<uint32_t>(inputs[blockIdx.x].dstSize);
-    uint8_t* end       = dst + dst_len;
-    s->src             = src;
-    s->src_len         = src_len;
-    s->dst_base        = dst;
-    s->end             = end;
+    const auto* src = static_cast<const uint8_t*>(inputs[blockIdx.x].srcDevice);
+    auto src_len    = static_cast<uint32_t>(inputs[blockIdx.x].srcSize);
+    auto* dst       = static_cast<uint8_t*>(inputs[blockIdx.x].dstDevice);
+    auto dst_len    = static_cast<uint32_t>(inputs[blockIdx.x].dstSize);
+    uint8_t* end    = dst + dst_len;
+    s->src          = src;
+    s->src_len      = src_len;
+    s->dst_base     = dst;
+    s->end          = end;
     while (src_len > 0x7f) {
       if (dst < end) { dst[0] = src_len | 0x80; }
       dst++;
diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp
index 94721fb9ce1..66d73074af0 100644
--- a/cpp/src/io/comp/uncomp.cpp
+++ b/cpp/src/io/comp/uncomp.cpp
@@ -22,7 +22,7 @@
 
 #include <cuda_runtime.h>
 
-#include <string.h>  // memset
+#include <cstring>  // memset
 
 #include <zlib.h>  // uncompress
 
@@ -196,17 +196,16 @@ bool OpenZipArchive(zip_archive_s* dst, const uint8_t* raw, size_t len)
     for (ptrdiff_t i = len - sizeof(zip_eocd_s) - 2;
          i + sizeof(zip_eocd_s) + 2 + 0xffff >= len && i >= 0;
          i--) {
-      const zip_eocd_s* eocd = reinterpret_cast<zip_eocd_s const*>(raw + i);
+      const auto* eocd = reinterpret_cast<zip_eocd_s const*>(raw + i);
       if (eocd->sig == 0x06054b50 &&
           eocd->disk_id == eocd->start_disk  // multi-file archives not supported
           && eocd->num_entries == eocd->total_entries &&
           eocd->cdir_size >= sizeof(zip_cdfh_s) * eocd->num_entries && eocd->cdir_offset < len &&
           i + *reinterpret_cast<const uint16_t*>(eocd + 1) <= static_cast<ptrdiff_t>(len)) {
-        const zip_cdfh_s* cdfh = reinterpret_cast<const zip_cdfh_s*>(raw + eocd->cdir_offset);
-        dst->eocd              = eocd;
+        const auto* cdfh = reinterpret_cast<const zip_cdfh_s*>(raw + eocd->cdir_offset);
+        dst->eocd        = eocd;
         if (i >= static_cast<ptrdiff_t>(sizeof(zip64_eocdl))) {
-          const zip64_eocdl* eocdl =
-            reinterpret_cast<const zip64_eocdl*>(raw + i - sizeof(zip64_eocdl));
+          const auto* eocdl = reinterpret_cast<const zip64_eocdl*>(raw + i - sizeof(zip64_eocdl));
           if (eocdl->sig == 0x07064b50) { dst->eocdl = eocdl; }
         }
         // Start of central directory
diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu
index bdd9ddaf1ea..791a16bc912 100644
--- a/cpp/src/io/comp/unsnap.cu
+++ b/cpp/src/io/comp/unsnap.cu
@@ -87,10 +87,10 @@ inline __device__ volatile uint8_t& byte_access(unsnap_state_s* s, uint32_t pos)
  */
 __device__ void snappy_prefetch_bytestream(unsnap_state_s* s, int t)
 {
-  const uint8_t* base  = s->base;
-  uint32_t end         = (uint32_t)(s->end - base);
-  uint32_t align_bytes = (uint32_t)(0x20 - (0x1f & reinterpret_cast<uintptr_t>(base)));
-  int32_t pos          = min(align_bytes, end);
+  const uint8_t* base = s->base;
+  auto end            = (uint32_t)(s->end - base);
+  auto align_bytes    = (uint32_t)(0x20 - (0x1f & reinterpret_cast<uintptr_t>(base)));
+  int32_t pos         = min(align_bytes, end);
   int32_t blen;
   // Start by prefetching up to the next a 32B-aligned location
   if (t < pos) { s->q.buf[t] = base[t]; }
@@ -278,7 +278,7 @@ inline __device__ uint32_t get_len5_mask(uint32_t v0, uint32_t v1)
 __device__ void snappy_decode_symbols(unsnap_state_s* s, uint32_t t)
 {
   uint32_t cur        = 0;
-  uint32_t end        = static_cast<uint32_t>(s->end - s->base);
+  auto end            = static_cast<uint32_t>(s->end - s->base);
   uint32_t bytes_left = s->uncompressed_size;
   uint32_t dst_pos    = 0;
   int32_t batch       = 0;
@@ -498,7 +498,7 @@ template <typename Storage>
 __device__ void snappy_process_symbols(unsnap_state_s* s, int t, Storage& temp_storage)
 {
   const uint8_t* literal_base = s->base;
-  uint8_t* out                = static_cast<uint8_t*>(s->in.dstDevice);
+  auto* out                   = static_cast<uint8_t*>(s->in.dstDevice);
   int batch                   = 0;
 
   do {
@@ -610,7 +610,7 @@ __device__ void snappy_process_symbols(unsnap_state_s* s, int t, Storage& temp_s
     __syncwarp();
     if (t == 0) { s->q.batch_len[batch] = 0; }
     batch = (batch + 1) & (batch_count - 1);
-  } while (1);
+  } while (true);
 }
 
 /**
@@ -639,7 +639,7 @@ __global__ void __launch_bounds__(block_size)
   if (t < batch_count) { s->q.batch_len[t] = 0; }
   __syncthreads();
   if (!t) {
-    const uint8_t* cur = static_cast<const uint8_t*>(s->in.srcDevice);
+    const auto* cur    = static_cast<const uint8_t*>(s->in.srcDevice);
     const uint8_t* end = cur + s->in.srcSize;
     s->error           = 0;
     if (log_cyclecount) { s->tstart = clock(); }
diff --git a/cpp/src/io/csv/csv_gpu.h b/cpp/src/io/csv/csv_gpu.h
index 9b83028fa92..ec45dea3072 100644
--- a/cpp/src/io/csv/csv_gpu.h
+++ b/cpp/src/io/csv/csv_gpu.h
@@ -48,8 +48,8 @@ constexpr uint32_t rowofs_block_bytes = rowofs_block_dim * 32;  // 16KB/threadbl
  * Format: row_count * 4 + id, where `row_count` is the number of rows
  * in a character block, and `id` is the row parser state at the end of the block.
  */
-typedef uint32_t rowctx32_t;
-typedef uint64_t rowctx64_t;
+using rowctx32_t = uint32_t;
+using rowctx64_t = uint64_t;
 
 /**
  * Packed row context format
@@ -61,7 +61,7 @@ typedef uint64_t rowctx64_t;
  * always zero (EOF input state implies a zero row count) and therefore
  * stored as 64-bit.
  */
-typedef uint64_t packed_rowctx_t;
+using packed_rowctx_t = uint64_t;
 
 /**
  * @brief return a row context from a {count, id} pair
@@ -116,7 +116,7 @@ inline __host__ __device__ rowctx32_t get_row_context(packed_rowctx_t packed_ctx
 inline __host__ __device__ rowctx64_t select_row_context(rowctx64_t sel_ctx,
                                                          packed_rowctx_t packed_ctx)
 {
-  uint32_t ctxid = static_cast<uint32_t>(sel_ctx & 3);
+  auto ctxid     = static_cast<uint32_t>(sel_ctx & 3);
   rowctx32_t ctx = get_row_context(packed_ctx, ctxid);
   return (sel_ctx & ~3) + ctx;
 }
diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu
index 1b66df860a3..1517226952a 100644
--- a/cpp/src/io/csv/writer_impl.cu
+++ b/cpp/src/io/csv/writer_impl.cu
@@ -126,7 +126,7 @@ struct column_to_strings_fn {
   // fails to compile var-templs);
   //
   template <typename column_type>
-  constexpr static bool is_not_handled(void)
+  constexpr static bool is_not_handled()
   {
     // Note: the case (not std::is_same_v<column_type, bool>)
     // is already covered by is_integral)
diff --git a/cpp/src/io/orc/aggregate_orc_metadata.hpp b/cpp/src/io/orc/aggregate_orc_metadata.hpp
index 01418fd3bd6..416beaebe5d 100644
--- a/cpp/src/io/orc/aggregate_orc_metadata.hpp
+++ b/cpp/src/io/orc/aggregate_orc_metadata.hpp
@@ -47,17 +47,17 @@ class aggregate_orc_metadata {
   /**
    * @brief Sums up the number of rows of each source
    */
-  size_type calc_num_rows() const;
+  [[nodiscard]] size_type calc_num_rows() const;
 
   /**
    * @brief Number of columns in a ORC file.
    */
-  size_type calc_num_cols() const;
+  [[nodiscard]] size_type calc_num_cols() const;
 
   /**
    * @brief Sums up the number of stripes of each source
    */
-  size_type calc_num_stripes() const;
+  [[nodiscard]] size_type calc_num_stripes() const;
 
  public:
   std::vector<metadata> per_file_metadata;
@@ -67,26 +67,29 @@ class aggregate_orc_metadata {
 
   aggregate_orc_metadata(std::vector<std::unique_ptr<datasource>> const& sources);
 
-  auto const& get_schema(int schema_idx) const { return per_file_metadata[0].ff.types[schema_idx]; }
+  [[nodiscard]] auto const& get_schema(int schema_idx) const
+  {
+    return per_file_metadata[0].ff.types[schema_idx];
+  }
 
   auto get_col_type(int col_idx) const { return per_file_metadata[0].ff.types[col_idx]; }
 
-  auto get_num_rows() const { return num_rows; }
+  [[nodiscard]] auto get_num_rows() const { return num_rows; }
 
   auto get_num_cols() const { return per_file_metadata[0].get_num_columns(); }
 
-  auto get_num_stripes() const { return num_stripes; }
+  [[nodiscard]] auto get_num_stripes() const { return num_stripes; }
 
-  auto const& get_types() const { return per_file_metadata[0].ff.types; }
+  [[nodiscard]] auto const& get_types() const { return per_file_metadata[0].ff.types; }
 
-  int get_row_index_stride() const { return per_file_metadata[0].ff.rowIndexStride; }
+  [[nodiscard]] int get_row_index_stride() const { return per_file_metadata[0].ff.rowIndexStride; }
 
-  auto is_row_grp_idx_present() const { return row_grp_idx_present; }
+  [[nodiscard]] auto is_row_grp_idx_present() const { return row_grp_idx_present; }
 
   /**
    * @brief Returns the name of the given column from the given source.
    */
-  std::string const& column_name(const int source_idx, const int column_id) const
+  [[nodiscard]] std::string const& column_name(const int source_idx, const int column_id) const
   {
     CUDF_EXPECTS(source_idx <= static_cast<int>(per_file_metadata.size()),
                  "Out of range source_idx provided");
@@ -98,7 +101,7 @@ class aggregate_orc_metadata {
    *
    * Full name includes ancestor columns' names.
    */
-  std::string const& column_path(const int source_idx, const int column_id) const
+  [[nodiscard]] std::string const& column_path(const int source_idx, const int column_id) const
   {
     CUDF_EXPECTS(source_idx <= static_cast<int>(per_file_metadata.size()),
                  "Out of range source_idx provided");
diff --git a/cpp/src/io/orc/orc.h b/cpp/src/io/orc/orc.h
index 4fa3480c90a..311f18bf72e 100644
--- a/cpp/src/io/orc/orc.h
+++ b/cpp/src/io/orc/orc.h
@@ -25,10 +25,10 @@
 #include <io/comp/io_uncomp.h>
 
 #include <algorithm>
+#include <cstddef>
+#include <cstdint>
 #include <memory>
 #include <optional>
-#include <stddef.h>
-#include <stdint.h>
 #include <string>
 #include <vector>
 
@@ -87,7 +87,7 @@ struct Stream {
 
   // Returns index of the column in the table, if any
   // Stream of the 'column 0' does not have a corresponding column in the table
-  std::optional<uint32_t> column_index() const noexcept
+  [[nodiscard]] std::optional<uint32_t> column_index() const noexcept
   {
     return column_id.value_or(0) > 0 ? std::optional<uint32_t>{*column_id - 1}
                                      : std::optional<uint32_t>{};
@@ -540,14 +540,14 @@ class OrcDecompressor {
  public:
   OrcDecompressor(CompressionKind kind, uint32_t blockSize);
   const uint8_t* Decompress(const uint8_t* srcBytes, size_t srcLen, size_t* dstLen);
-  uint32_t GetLog2MaxCompressionRatio() const { return m_log2MaxRatio; }
-  uint32_t GetMaxUncompressedBlockSize(uint32_t block_len) const
+  [[nodiscard]] uint32_t GetLog2MaxCompressionRatio() const { return m_log2MaxRatio; }
+  [[nodiscard]] uint32_t GetMaxUncompressedBlockSize(uint32_t block_len) const
   {
     return (block_len < (m_blockSize >> m_log2MaxRatio)) ? block_len << m_log2MaxRatio
                                                          : m_blockSize;
   }
-  CompressionKind GetKind() const { return m_kind; }
-  uint32_t GetBlockSize() const { return m_blockSize; }
+  [[nodiscard]] CompressionKind GetKind() const { return m_kind; }
+  [[nodiscard]] uint32_t GetBlockSize() const { return m_blockSize; }
 
  protected:
   CompressionKind const m_kind;
@@ -603,16 +603,16 @@ class metadata {
  public:
   explicit metadata(datasource* const src);
 
-  size_t get_total_rows() const { return ff.numberOfRows; }
-  int get_num_stripes() const { return ff.stripes.size(); }
-  int get_num_columns() const { return ff.types.size(); }
+  [[nodiscard]] size_t get_total_rows() const { return ff.numberOfRows; }
+  [[nodiscard]] int get_num_stripes() const { return ff.stripes.size(); }
+  [[nodiscard]] int get_num_columns() const { return ff.types.size(); }
   /**
    * @brief Returns the name of the column with the given ID.
    *
    * Name might not be unique in the ORC file, since columns with different parents are allowed to
    * have the same names.
    */
-  std::string const& column_name(size_type column_id) const
+  [[nodiscard]] std::string const& column_name(size_type column_id) const
   {
     CUDF_EXPECTS(column_id < get_num_columns(), "Out of range column id provided");
     return column_names[column_id];
@@ -623,22 +623,25 @@ class metadata {
    *
    * Each column in the ORC file has a unique path.
    */
-  std::string const& column_path(size_type column_id) const
+  [[nodiscard]] std::string const& column_path(size_type column_id) const
   {
     CUDF_EXPECTS(column_id < get_num_columns(), "Out of range column id provided");
     return column_paths[column_id];
   }
-  int get_row_index_stride() const { return ff.rowIndexStride; }
+  [[nodiscard]] int get_row_index_stride() const { return ff.rowIndexStride; }
 
   /**
    * @brief Returns the ID of the parent column of the given column.
    */
-  size_type parent_id(size_type column_id) const { return parents.at(column_id).value().id; }
+  [[nodiscard]] size_type parent_id(size_type column_id) const
+  {
+    return parents.at(column_id).value().id;
+  }
 
   /**
    * @brief Returns the index the given column has in its parent's children list.
    */
-  size_type field_index(size_type column_id) const
+  [[nodiscard]] size_type field_index(size_type column_id) const
   {
     return parents.at(column_id).value().field_idx;
   }
@@ -646,7 +649,7 @@ class metadata {
   /**
    * @brief Returns whether the given column has a parent.
    */
-  size_type column_has_parent(size_type column_id) const
+  [[nodiscard]] size_type column_has_parent(size_type column_id) const
   {
     return parents.at(column_id).has_value();
   }
@@ -693,7 +696,7 @@ struct orc_column_device_view : public column_device_view {
 struct rowgroup_rows {
   size_type begin;
   size_type end;
-  constexpr auto size() const noexcept { return end - begin; }
+  [[nodiscard]] constexpr auto size() const noexcept { return end - begin; }
 };
 
 }  // namespace orc
diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu
index 21c52f9295b..817b9fd7b01 100644
--- a/cpp/src/io/orc/reader_impl.cu
+++ b/cpp/src/io/orc/reader_impl.cu
@@ -371,7 +371,7 @@ rmm::device_buffer reader::impl::decompress_stripe_data(
   size_t decomp_offset           = 0;
   uint32_t max_uncomp_block_size = 0;
   uint32_t start_pos             = 0;
-  uint32_t start_pos_uncomp      = (uint32_t)num_compressed_blocks;
+  auto start_pos_uncomp          = (uint32_t)num_compressed_blocks;
   for (size_t i = 0; i < compinfo.size(); ++i) {
     auto dst_base                 = static_cast<uint8_t*>(decomp_data.data());
     compinfo[i].uncompressed_data = dst_base + decomp_offset;
diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu
index 05bc25597c2..dc09b3e7dd8 100644
--- a/cpp/src/io/orc/stripe_data.cu
+++ b/cpp/src/io/orc/stripe_data.cu
@@ -1179,7 +1179,7 @@ __global__ void __launch_bounds__(block_size)
 
       row_in = s->chunk.start_row + s->top.nulls_desc_row - prev_parent_null_count;
       if (row_in + nrows > first_row && row_in < first_row + max_num_rows &&
-          s->chunk.valid_map_base != NULL) {
+          s->chunk.valid_map_base != nullptr) {
         int64_t dst_row   = row_in - first_row;
         int64_t dst_pos   = max(dst_row, (int64_t)0);
         uint32_t startbit = -static_cast<int32_t>(min(dst_row, (int64_t)0));
@@ -1325,14 +1325,14 @@ static __device__ void DecodeRowPositions(orcdec_state_s* s,
          s->top.data.cur_row + s->top.data.nrows < s->top.data.end_row) {
     uint32_t nrows = min(s->top.data.end_row - (s->top.data.cur_row + s->top.data.nrows),
                          min((row_decoder_buffer_size - s->u.rowdec.nz_count) * 2, blockDim.x));
-    if (s->chunk.valid_map_base != NULL) {
+    if (s->chunk.valid_map_base != nullptr) {
       // We have a present stream
       uint32_t rmax  = s->top.data.end_row - min((uint32_t)first_row, s->top.data.end_row);
-      uint32_t r     = (uint32_t)(s->top.data.cur_row + s->top.data.nrows + t - first_row);
+      auto r         = (uint32_t)(s->top.data.cur_row + s->top.data.nrows + t - first_row);
       uint32_t valid = (t < nrows && r < rmax)
                          ? (((const uint8_t*)s->chunk.valid_map_base)[r >> 3] >> (r & 7)) & 1
                          : 0;
-      volatile uint16_t* row_ofs_plus1 = (volatile uint16_t*)&s->u.rowdec.row[s->u.rowdec.nz_count];
+      volatile auto* row_ofs_plus1 = (volatile uint16_t*)&s->u.rowdec.row[s->u.rowdec.nz_count];
       uint32_t nz_pos, row_plus1, nz_count = s->u.rowdec.nz_count, last_row;
       if (t < nrows) { row_ofs_plus1[t] = valid; }
       lengths_to_positions<uint16_t>(row_ofs_plus1, nrows, t);
diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu
index 660ec025d00..02ae191d55a 100644
--- a/cpp/src/io/orc/stripe_enc.cu
+++ b/cpp/src/io/orc/stripe_enc.cu
@@ -1040,7 +1040,7 @@ __global__ void __launch_bounds__(block_size)
     uint32_t string_idx = (t < numvals) ? dict_data[s->cur_row + t] : 0;
     if (cid == CI_DICTIONARY) {
       // Encoding string contents
-      const char* ptr = 0;
+      const char* ptr = nullptr;
       uint32_t count  = 0;
       if (t < numvals) {
         auto string_val = string_column->element<string_view>(string_idx);
diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu
index b197751d925..276a1f49abf 100644
--- a/cpp/src/io/orc/stripe_init.cu
+++ b/cpp/src/io/orc/stripe_init.cu
@@ -428,7 +428,7 @@ extern "C" __global__ void __launch_bounds__(128, 8)
     uint32_t rowgroups_in_chunk = s->chunk.num_rowgroups;
     s->rowgroup_start           = s->chunk.rowgroup_id;
     s->rowgroup_end             = s->rowgroup_start + rowgroups_in_chunk;
-    s->is_compressed            = (strm_info != NULL);
+    s->is_compressed            = (strm_info != nullptr);
   }
   __syncthreads();
   while (s->rowgroup_start < s->rowgroup_end) {
@@ -480,7 +480,7 @@ __global__ void __launch_bounds__(block_size)
                             device_2dspan<rowgroup_rows const> rowgroup_bounds,
                             device_2dspan<size_type> set_counts)
 {
-  typedef cub::BlockReduce<size_type, block_size> BlockReduce;
+  using BlockReduce = cub::BlockReduce<size_type, block_size>;
   __shared__ typename BlockReduce::TempStorage temp_storage;
 
   auto const column_id   = blockIdx.x;
diff --git a/cpp/src/io/orc/timezone.cpp b/cpp/src/io/orc/timezone.cpp
index 3a1e8bf898a..810dfe87320 100644
--- a/cpp/src/io/orc/timezone.cpp
+++ b/cpp/src/io/orc/timezone.cpp
@@ -70,8 +70,8 @@ struct timezone_file {
   std::vector<localtime_type_record_s> ttype;
   std::vector<char> posix_tz_string;
 
-  auto timecnt() const { return header.timecnt; }
-  auto typecnt() const { return header.typecnt; }
+  [[nodiscard]] auto timecnt() const { return header.timecnt; }
+  [[nodiscard]] auto typecnt() const { return header.typecnt; }
 
   // Based on https://tools.ietf.org/id/draft-murchison-tzdist-tzif-00.html
   static constexpr auto leap_second_rec_size(bool is_64bit) noexcept
@@ -222,7 +222,7 @@ class posix_parser {
   /**
    * @brief Returns the next character in the input.
    */
-  char next_character() const { return *cur; }
+  [[nodiscard]] char next_character() const { return *cur; }
 
  private:
   typename Container::const_iterator cur;
diff --git a/cpp/src/io/orc/timezone.cuh b/cpp/src/io/orc/timezone.cuh
index b25dfd0a621..a14d94df540 100644
--- a/cpp/src/io/orc/timezone.cuh
+++ b/cpp/src/io/orc/timezone.cuh
@@ -24,7 +24,7 @@
 #include <thrust/binary_search.h>
 #include <thrust/execution_policy.h>
 
-#include <stdint.h>
+#include <cstdint>
 #include <string>
 #include <vector>
 
@@ -121,7 +121,7 @@ class timezone_table {
     : gmt_offset{gmt_offset}, ttimes{std::move(ttimes)}, offsets{std::move(offsets)}
   {
   }
-  timezone_table_view view() const { return {gmt_offset, ttimes, offsets}; }
+  [[nodiscard]] timezone_table_view view() const { return {gmt_offset, ttimes, offsets}; }
 };
 
 /**
diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index b7264cb81ac..a917dbf93a5 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -181,7 +181,7 @@ class orc_column_view {
 
   auto is_string() const noexcept { return cudf_column.type().id() == type_id::STRING; }
   void set_dict_stride(size_t stride) noexcept { _dict_stride = stride; }
-  auto dict_stride() const noexcept { return _dict_stride; }
+  [[nodiscard]] auto dict_stride() const noexcept { return _dict_stride; }
 
   /**
    * @brief Function that associates an existing dictionary chunk allocation
@@ -192,14 +192,14 @@ class orc_column_view {
     dict   = host_dict;
     d_dict = dev_dict;
   }
-  auto host_dict_chunk(size_t rowgroup) const
+  [[nodiscard]] auto host_dict_chunk(size_t rowgroup) const
   {
     CUDF_EXPECTS(is_string(), "Dictionary chunks are only present in string columns.");
     return &dict[rowgroup * _dict_stride + _str_idx];
   }
-  auto device_dict_chunk() const { return d_dict; }
+  [[nodiscard]] auto device_dict_chunk() const { return d_dict; }
 
-  auto const& decimal_offsets() const { return d_decimal_offsets; }
+  [[nodiscard]] auto const& decimal_offsets() const { return d_decimal_offsets; }
   void attach_decimal_offsets(uint32_t* sizes_ptr) { d_decimal_offsets = sizes_ptr; }
 
   /**
@@ -211,39 +211,39 @@ class orc_column_view {
     stripe_dict   = host_stripe_dict;
     d_stripe_dict = dev_stripe_dict;
   }
-  auto host_stripe_dict(size_t stripe) const
+  [[nodiscard]] auto host_stripe_dict(size_t stripe) const
   {
     CUDF_EXPECTS(is_string(), "Stripe dictionary is only present in string columns.");
     return &stripe_dict[stripe * _dict_stride + _str_idx];
   }
-  auto device_stripe_dict() const noexcept { return d_stripe_dict; }
+  [[nodiscard]] auto device_stripe_dict() const noexcept { return d_stripe_dict; }
 
   // Index in the table
-  uint32_t index() const noexcept { return _index; }
+  [[nodiscard]] uint32_t index() const noexcept { return _index; }
   // Id in the ORC file
-  auto id() const noexcept { return _index + 1; }
+  [[nodiscard]] auto id() const noexcept { return _index + 1; }
 
-  auto is_child() const noexcept { return _is_child; }
+  [[nodiscard]] auto is_child() const noexcept { return _is_child; }
   auto parent_index() const noexcept { return _parent_index.value(); }
   auto child_begin() const noexcept { return children.cbegin(); }
   auto child_end() const noexcept { return children.cend(); }
   auto num_children() const noexcept { return children.size(); }
 
-  auto type_width() const noexcept { return _type_width; }
+  [[nodiscard]] auto type_width() const noexcept { return _type_width; }
   auto size() const noexcept { return cudf_column.size(); }
 
   auto null_count() const noexcept { return cudf_column.null_count(); }
   auto null_mask() const noexcept { return cudf_column.null_mask(); }
-  bool nullable() const noexcept { return null_mask() != nullptr; }
+  [[nodiscard]] bool nullable() const noexcept { return null_mask() != nullptr; }
   auto user_defined_nullable() const noexcept { return nullable_from_metadata; }
 
-  auto scale() const noexcept { return _scale; }
-  auto precision() const noexcept { return _precision; }
+  [[nodiscard]] auto scale() const noexcept { return _scale; }
+  [[nodiscard]] auto precision() const noexcept { return _precision; }
 
   void set_orc_encoding(ColumnEncodingKind e) noexcept { _encoding_kind = e; }
-  auto orc_kind() const noexcept { return _type_kind; }
-  auto orc_encoding() const noexcept { return _encoding_kind; }
-  std::string_view orc_name() const noexcept { return name; }
+  [[nodiscard]] auto orc_kind() const noexcept { return _type_kind; }
+  [[nodiscard]] auto orc_encoding() const noexcept { return _encoding_kind; }
+  [[nodiscard]] std::string_view orc_name() const noexcept { return name; }
 
  private:
   column_view cudf_column;
@@ -1063,15 +1063,15 @@ void set_stat_desc_leaf_cols(device_span<orc_column_device_view const> columns,
 }
 
 writer::impl::encoded_statistics writer::impl::gather_statistic_blobs(
-  bool are_statistics_enabled,
+  statistics_freq stats_freq,
   orc_table_view const& orc_table,
   file_segmentation const& segmentation)
 {
-  auto const num_rowgroup_blobs = segmentation.rowgroups.count();
-  auto const num_stripe_blobs   = segmentation.num_stripes() * orc_table.num_columns();
-  auto const num_file_blobs     = orc_table.num_columns();
-  auto const num_stat_blobs     = num_rowgroup_blobs + num_stripe_blobs + num_file_blobs;
-
+  auto const num_rowgroup_blobs     = segmentation.rowgroups.count();
+  auto const num_stripe_blobs       = segmentation.num_stripes() * orc_table.num_columns();
+  auto const num_file_blobs         = orc_table.num_columns();
+  auto const num_stat_blobs         = num_rowgroup_blobs + num_stripe_blobs + num_file_blobs;
+  auto const are_statistics_enabled = stats_freq != statistics_freq::STATISTICS_NONE;
   if (not are_statistics_enabled or num_stat_blobs == 0) { return {}; }
 
   hostdevice_vector<stats_column_desc> stat_desc(orc_table.num_columns(), stream);
@@ -1164,17 +1164,27 @@ writer::impl::encoded_statistics writer::impl::gather_statistic_blobs(
 
   hostdevice_vector<uint8_t> blobs(
     stat_merge[num_stat_blobs - 1].start_chunk + stat_merge[num_stat_blobs - 1].num_chunks, stream);
-  gpu::orc_encode_statistics(
-    blobs.device_ptr(), stat_merge.device_ptr(), stat_chunks.data(), num_stat_blobs, stream);
+  // Skip rowgroup blobs when encoding, if chosen granularity is coarser than "ROW_GROUP".
+  auto const is_granularity_rowgroup = stats_freq == ORC_STATISTICS_ROW_GROUP;
+  auto const num_skip                = is_granularity_rowgroup ? 0 : num_rowgroup_blobs;
+  gpu::orc_encode_statistics(blobs.device_ptr(),
+                             stat_merge.device_ptr(num_skip),
+                             stat_chunks.data() + num_skip,
+                             num_stat_blobs - num_skip,
+                             stream);
   stat_merge.device_to_host(stream);
   blobs.device_to_host(stream, true);
 
-  std::vector<ColStatsBlob> rowgroup_blobs(num_rowgroup_blobs);
-  for (size_t i = 0; i < num_rowgroup_blobs; i++) {
-    auto const stat_begin = blobs.host_ptr(rowgroup_stat_merge[i].start_chunk);
-    auto const stat_end   = stat_begin + rowgroup_stat_merge[i].num_chunks;
-    rowgroup_blobs[i].assign(stat_begin, stat_end);
-  }
+  auto rowgroup_blobs = [&]() -> std::vector<ColStatsBlob> {
+    if (not is_granularity_rowgroup) { return {}; }
+    std::vector<ColStatsBlob> rowgroup_blobs(num_rowgroup_blobs);
+    for (size_t i = 0; i < num_rowgroup_blobs; i++) {
+      auto const stat_begin = blobs.host_ptr(rowgroup_stat_merge[i].start_chunk);
+      auto const stat_end   = stat_begin + rowgroup_stat_merge[i].num_chunks;
+      rowgroup_blobs[i].assign(stat_begin, stat_end);
+    }
+    return rowgroup_blobs;
+  }();
 
   std::vector<ColStatsBlob> stripe_blobs(num_stripe_blobs);
   for (size_t i = 0; i < num_stripe_blobs; i++) {
@@ -1351,7 +1361,7 @@ writer::impl::impl(std::unique_ptr<data_sink> sink,
     max_stripe_size{options.get_stripe_size_bytes(), options.get_stripe_size_rows()},
     row_index_stride{options.get_row_index_stride()},
     compression_kind_(to_orc_compression(options.get_compression())),
-    enable_statistics_(options.is_enabled_statistics()),
+    stats_freq_(options.get_statistics_freq()),
     single_write_mode(mode == SingleWriteMode::YES),
     kv_meta(options.get_key_value_metadata()),
     out_sink_(std::move(sink))
@@ -1372,7 +1382,7 @@ writer::impl::impl(std::unique_ptr<data_sink> sink,
     max_stripe_size{options.get_stripe_size_bytes(), options.get_stripe_size_rows()},
     row_index_stride{options.get_row_index_stride()},
     compression_kind_(to_orc_compression(options.get_compression())),
-    enable_statistics_(options.is_enabled_statistics()),
+    stats_freq_(options.get_statistics_freq()),
     single_write_mode(mode == SingleWriteMode::YES),
     kv_meta(options.get_key_value_metadata()),
     out_sink_(std::move(sink))
@@ -1954,7 +1964,7 @@ void writer::impl::write(table_view const& table)
 
     ProtobufWriter pbw_(&buffer_);
 
-    auto const statistics = gather_statistic_blobs(enable_statistics_, orc_table, segmentation);
+    auto const statistics = gather_statistic_blobs(stats_freq_, orc_table, segmentation);
 
     // Write stripes
     std::vector<std::future<void>> write_tasks;
diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp
index 2738a77e50a..69bb6029ee0 100644
--- a/cpp/src/io/orc/writer_impl.hpp
+++ b/cpp/src/io/orc/writer_impl.hpp
@@ -62,14 +62,14 @@ struct orc_table_view {
   rmm::device_uvector<uint32_t> d_string_column_indices;
 
   auto num_columns() const noexcept { return columns.size(); }
-  size_type num_rows() const noexcept;
+  [[nodiscard]] size_type num_rows() const noexcept;
   auto num_string_columns() const noexcept { return string_column_indices.size(); }
 
   auto& column(uint32_t idx) { return columns.at(idx); }
-  auto const& column(uint32_t idx) const { return columns.at(idx); }
+  [[nodiscard]] auto const& column(uint32_t idx) const { return columns.at(idx); }
 
   auto& string_column(uint32_t idx) { return columns.at(string_column_indices.at(idx)); }
-  auto const& string_column(uint32_t idx) const
+  [[nodiscard]] auto const& string_column(uint32_t idx) const
   {
     return columns.at(string_column_indices.at(idx));
   }
@@ -85,8 +85,8 @@ struct stripe_rowgroups {
   uint32_t first;  // first rowgroup in the stripe
   uint32_t size;   // number of rowgroups in the stripe
   stripe_rowgroups(uint32_t id, uint32_t first, uint32_t size) : id{id}, first{first}, size{size} {}
-  auto cbegin() const { return thrust::make_counting_iterator(first); }
-  auto cend() const { return thrust::make_counting_iterator(first + size); }
+  [[nodiscard]] auto cbegin() const { return thrust::make_counting_iterator(first); }
+  [[nodiscard]] auto cend() const { return thrust::make_counting_iterator(first + size); }
 };
 
 /**
@@ -123,10 +123,10 @@ class orc_streams {
     std::vector<size_t> offsets;
     size_t non_rle_data_size = 0;
     size_t rle_data_size     = 0;
-    auto data_size() const { return non_rle_data_size + rle_data_size; }
+    [[nodiscard]] auto data_size() const { return non_rle_data_size + rle_data_size; }
   };
-  orc_stream_offsets compute_offsets(host_span<orc_column_view const> columns,
-                                     size_t num_rowgroups) const;
+  [[nodiscard]] orc_stream_offsets compute_offsets(host_span<orc_column_view const> columns,
+                                                   size_t num_rowgroups) const;
 
   operator std::vector<Stream> const &() const { return streams; }
 
@@ -293,13 +293,13 @@ class writer::impl {
   /**
    * @brief Returns column statistics encoded in ORC protobuf format.
    *
-   * @param are_statistics_enabled True if statistics are to be included in the output file
+   * @param statistics_freq Frequency of statistics to be included in the output file
    * @param orc_table Table information to be written
    * @param columns List of columns
    * @param segmentation stripe and rowgroup ranges
    * @return The statistic blobs
    */
-  encoded_statistics gather_statistic_blobs(bool are_statistics_enabled,
+  encoded_statistics gather_statistic_blobs(statistics_freq statistics_freq,
                                             orc_table_view const& orc_table,
                                             file_segmentation const& segmentation);
 
@@ -365,8 +365,8 @@ class writer::impl {
   size_t compression_blocksize_     = DEFAULT_COMPRESSION_BLOCKSIZE;
   CompressionKind compression_kind_ = CompressionKind::NONE;
 
-  bool enable_dictionary_ = true;
-  bool enable_statistics_ = true;
+  bool enable_dictionary_     = true;
+  statistics_freq stats_freq_ = ORC_STATISTICS_ROW_GROUP;
 
   // Overall file metadata.  Filled in during the process and written during write_chunked_end()
   cudf::io::orc::FileFooter ff;
diff --git a/cpp/src/io/parquet/compact_protocol_writer.hpp b/cpp/src/io/parquet/compact_protocol_writer.hpp
index 71452bd7809..53739a26beb 100644
--- a/cpp/src/io/parquet/compact_protocol_writer.hpp
+++ b/cpp/src/io/parquet/compact_protocol_writer.hpp
@@ -20,8 +20,8 @@
 #include "parquet_common.hpp"
 
 #include <algorithm>
-#include <stddef.h>
-#include <stdint.h>
+#include <cstddef>
+#include <cstdint>
 #include <string>
 #include <vector>
 
diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 751d6b62319..df4310fcd63 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -102,7 +102,7 @@ struct page_state_s {
  */
 __device__ uint32_t device_str2hash32(const char* key, size_t len, uint32_t seed = 33)
 {
-  const uint8_t* p  = reinterpret_cast<const uint8_t*>(key);
+  const auto* p     = reinterpret_cast<const uint8_t*>(key);
   uint32_t h1       = seed, k1;
   const uint32_t c1 = 0xcc9e2d51;
   const uint32_t c2 = 0x1b873593;
@@ -513,7 +513,7 @@ __device__ void gpuInitStringDescriptors(volatile page_state_s* s, int target_po
  */
 inline __device__ void gpuOutputString(volatile page_state_s* s, int src_pos, void* dstv)
 {
-  const char* ptr = NULL;
+  const char* ptr = nullptr;
   size_t len      = 0;
 
   if (s->dict_base) {
@@ -522,10 +522,9 @@ inline __device__ void gpuOutputString(volatile page_state_s* s, int src_pos, vo
                                                sizeof(string_index_pair)
                                            : 0;
     if (dict_pos < (uint32_t)s->dict_size) {
-      const string_index_pair* src =
-        reinterpret_cast<const string_index_pair*>(s->dict_base + dict_pos);
-      ptr = src->first;
-      len = src->second;
+      const auto* src = reinterpret_cast<const string_index_pair*>(s->dict_base + dict_pos);
+      ptr             = src->first;
+      len             = src->second;
     }
   } else {
     // Plain encoding
@@ -540,9 +539,9 @@ inline __device__ void gpuOutputString(volatile page_state_s* s, int src_pos, vo
     *static_cast<uint32_t*>(dstv) = device_str2hash32(ptr, len);
   } else {
     // Output string descriptor
-    string_index_pair* dst = static_cast<string_index_pair*>(dstv);
-    dst->first             = ptr;
-    dst->second            = len;
+    auto* dst   = static_cast<string_index_pair*>(dstv);
+    dst->first  = ptr;
+    dst->second = len;
   }
 }
 
@@ -1016,7 +1015,7 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
       cur += InitLevelSection(s, cur, end, level_type::DEFINITION);
 
       s->dict_bits = 0;
-      s->dict_base = 0;
+      s->dict_base = nullptr;
       s->dict_size = 0;
       switch (s->page.encoding) {
         case Encoding::PLAIN_DICTIONARY:
@@ -1133,7 +1132,7 @@ static __device__ void store_validity(PageNestingInfo* pni,
   int bit_offset  = pni->valid_map_offset % 32;
   // if we fit entirely in the output word
   if (bit_offset + value_count <= 32) {
-    uint32_t relevant_mask = static_cast<uint32_t>((static_cast<uint64_t>(1) << value_count) - 1);
+    auto relevant_mask = static_cast<uint32_t>((static_cast<uint64_t>(1) << value_count) - 1);
 
     if (relevant_mask == ~0) {
       pni->valid_map[word_offset] = valid_mask;
diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu
index ec6b24b3b4e..2074304251f 100644
--- a/cpp/src/io/parquet/page_enc.cu
+++ b/cpp/src/io/parquet/page_enc.cu
@@ -1068,7 +1068,7 @@ __global__ void __launch_bounds__(128, 8)
   }
   if (t == 0) {
     uint8_t* base                = s->page.page_data + s->page.max_hdr_size;
-    uint32_t actual_data_size    = static_cast<uint32_t>(s->cur - base);
+    auto actual_data_size        = static_cast<uint32_t>(s->cur - base);
     uint32_t compressed_bfr_size = GetMaxCompressedBfrSize(actual_data_size);
     s->page.max_data_size        = actual_data_size;
     s->comp_in.srcDevice         = base;
@@ -1244,7 +1244,7 @@ class header_encoder {
     *header_end = current_header_ptr;
   }
 
-  inline __device__ uint8_t* get_ptr(void) { return current_header_ptr; }
+  inline __device__ uint8_t* get_ptr() { return current_header_ptr; }
 
   inline __device__ void set_ptr(uint8_t* ptr) { current_header_ptr = ptr; }
 };
diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp
index 21610638843..b4fa9b4ae82 100644
--- a/cpp/src/io/parquet/parquet.hpp
+++ b/cpp/src/io/parquet/parquet.hpp
@@ -19,8 +19,8 @@
 #include "parquet_common.hpp"
 
 #include <algorithm>
-#include <stddef.h>
-#include <stdint.h>
+#include <cstddef>
+#include <cstdint>
 #include <string>
 #include <tuple>
 #include <vector>
@@ -65,11 +65,11 @@ struct MilliSeconds {
 };
 struct MicroSeconds {
 };
-typedef struct TimeUnit_isset {
-  TimeUnit_isset() : MILLIS(false), MICROS(false) {}
-  bool MILLIS;
-  bool MICROS;
-} TimeUnit_isset;
+using TimeUnit_isset = struct TimeUnit_isset {
+  TimeUnit_isset() {}
+  bool MILLIS{false};
+  bool MICROS{false};
+};
 
 struct TimeUnit {
   TimeUnit_isset isset;
@@ -97,35 +97,21 @@ struct BsonType {
 };
 
 // thrift generated code simplified.
-typedef struct LogicalType_isset {
-  LogicalType_isset()
-    : STRING(false),
-      MAP(false),
-      LIST(false),
-      ENUM(false),
-      DECIMAL(false),
-      DATE(false),
-      TIME(false),
-      TIMESTAMP(false),
-      INTEGER(false),
-      UNKNOWN(false),
-      JSON(false),
-      BSON(false)
-  {
-  }
-  bool STRING;
-  bool MAP;
-  bool LIST;
-  bool ENUM;
-  bool DECIMAL;
-  bool DATE;
-  bool TIME;
-  bool TIMESTAMP;
-  bool INTEGER;
-  bool UNKNOWN;
-  bool JSON;
-  bool BSON;
-} LogicalType_isset;
+using LogicalType_isset = struct LogicalType_isset {
+  LogicalType_isset() {}
+  bool STRING{false};
+  bool MAP{false};
+  bool LIST{false};
+  bool ENUM{false};
+  bool DECIMAL{false};
+  bool DATE{false};
+  bool TIME{false};
+  bool TIMESTAMP{false};
+  bool INTEGER{false};
+  bool UNKNOWN{false};
+  bool JSON{false};
+  bool BSON{false};
+};
 
 struct LogicalType {
   LogicalType_isset isset;
@@ -197,16 +183,19 @@ struct SchemaElement {
   //     required int32 num;
   //  };
   // }
-  bool is_stub() const { return repetition_type == REPEATED && num_children == 1; }
+  [[nodiscard]] bool is_stub() const { return repetition_type == REPEATED && num_children == 1; }
 
   // https://github.com/apache/parquet-cpp/blob/642da05/src/parquet/schema.h#L49-L50
   // One-level LIST encoding: Only allows required lists with required cells:
   //   repeated value_type name
-  bool is_one_level_list() const { return repetition_type == REPEATED and num_children == 0; }
+  [[nodiscard]] bool is_one_level_list() const
+  {
+    return repetition_type == REPEATED and num_children == 0;
+  }
 
   // in parquet terms, a group is a level of nesting in the schema. a group
   // can be a struct or a list
-  bool is_struct() const
+  [[nodiscard]] bool is_struct() const
   {
     return type == UNDEFINED_TYPE &&
            // this assumption might be a little weak.
@@ -369,7 +358,7 @@ class CompactProtocolReader {
     m_base = m_cur = base;
     m_end          = base + len;
   }
-  ptrdiff_t bytecount() const noexcept { return m_cur - m_base; }
+  [[nodiscard]] ptrdiff_t bytecount() const noexcept { return m_cur - m_base; }
   unsigned int getb() noexcept { return (m_cur < m_end) ? *m_cur++ : 0; }
   void skip_bytes(size_t bytecnt) noexcept
   {
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index fc4afe951db..885f36aeca4 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -337,7 +337,7 @@ class aggregate_reader_metadata {
   /**
    * @brief Sums up the number of rows of each source
    */
-  size_type calc_num_rows() const
+  [[nodiscard]] size_type calc_num_rows() const
   {
     return std::accumulate(
       per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) {
@@ -348,7 +348,7 @@ class aggregate_reader_metadata {
   /**
    * @brief Sums up the number of row groups of each source
    */
-  size_type calc_num_row_groups() const
+  [[nodiscard]] size_type calc_num_row_groups() const
   {
     return std::accumulate(
       per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) {
@@ -381,16 +381,16 @@ class aggregate_reader_metadata {
     }
   }
 
-  auto const& get_row_group(size_type row_group_index, size_type src_idx) const
+  [[nodiscard]] auto const& get_row_group(size_type row_group_index, size_type src_idx) const
   {
     CUDF_EXPECTS(src_idx >= 0 && src_idx < static_cast<size_type>(per_file_metadata.size()),
                  "invalid source index");
     return per_file_metadata[src_idx].row_groups[row_group_index];
   }
 
-  auto const& get_column_metadata(size_type row_group_index,
-                                  size_type src_idx,
-                                  int schema_idx) const
+  [[nodiscard]] auto const& get_column_metadata(size_type row_group_index,
+                                                size_type src_idx,
+                                                int schema_idx) const
   {
     auto col = std::find_if(
       per_file_metadata[src_idx].row_groups[row_group_index].columns.begin(),
@@ -401,13 +401,16 @@ class aggregate_reader_metadata {
     return col->meta_data;
   }
 
-  auto get_num_rows() const { return num_rows; }
+  [[nodiscard]] auto get_num_rows() const { return num_rows; }
 
-  auto get_num_row_groups() const { return num_row_groups; }
+  [[nodiscard]] auto get_num_row_groups() const { return num_row_groups; }
 
-  auto const& get_schema(int schema_idx) const { return per_file_metadata[0].schema[schema_idx]; }
+  [[nodiscard]] auto const& get_schema(int schema_idx) const
+  {
+    return per_file_metadata[0].schema[schema_idx];
+  }
 
-  auto const& get_key_value_metadata() const { return agg_keyval_map; }
+  [[nodiscard]] auto const& get_key_value_metadata() const { return agg_keyval_map; }
 
   /**
    * @brief Gets the concrete nesting depth of output cudf columns
@@ -416,7 +419,7 @@ class aggregate_reader_metadata {
    *
    * @return comma-separated index column names in quotes
    */
-  inline int get_output_nesting_depth(int schema_index) const
+  [[nodiscard]] inline int get_output_nesting_depth(int schema_index) const
   {
     auto& pfm = per_file_metadata[0];
     int depth = 0;
@@ -441,7 +444,7 @@ class aggregate_reader_metadata {
    *
    * @return comma-separated index column names in quotes
    */
-  std::string get_pandas_index() const
+  [[nodiscard]] std::string get_pandas_index() const
   {
     auto it = agg_keyval_map.find("pandas");
     if (it != agg_keyval_map.end()) {
@@ -472,7 +475,7 @@ class aggregate_reader_metadata {
    *
    * @param names List of column names to load, where index column name(s) will be added
    */
-  std::vector<std::string> get_pandas_index_names() const
+  [[nodiscard]] std::vector<std::string> get_pandas_index_names() const
   {
     std::vector<std::string> names;
     auto str = get_pandas_index();
@@ -511,9 +514,9 @@ class aggregate_reader_metadata {
    *
    * @return List of row group indexes and its starting row
    */
-  auto select_row_groups(std::vector<std::vector<size_type>> const& row_groups,
-                         size_type& row_start,
-                         size_type& row_count) const
+  [[nodiscard]] auto select_row_groups(std::vector<std::vector<size_type>> const& row_groups,
+                                       size_type& row_start,
+                                       size_type& row_count) const
   {
     if (!row_groups.empty()) {
       std::vector<row_group_info> selection;
@@ -570,10 +573,10 @@ class aggregate_reader_metadata {
    * @return input column information, output column information, list of output column schema
    * indices
    */
-  auto select_columns(std::vector<std::string> const& use_names,
-                      bool include_index,
-                      bool strings_to_categorical,
-                      type_id timestamp_type_id) const
+  [[nodiscard]] auto select_columns(std::vector<std::string> const& use_names,
+                                    bool include_index,
+                                    bool strings_to_categorical,
+                                    type_id timestamp_type_id) const
   {
     auto find_schema_child = [&](SchemaElement const& schema_elem, std::string const& name) {
       auto const& col_schema_idx = std::find_if(
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index b302516ba39..a9306275b26 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -166,12 +166,12 @@ struct aggregate_writer_metadata {
     return global_rowgroup_base;
   }
 
-  bool schema_matches(std::vector<SchemaElement> const& schema) const
+  [[nodiscard]] bool schema_matches(std::vector<SchemaElement> const& schema) const
   {
     return this->schema == schema;
   }
   auto& file(size_t p) { return files[p]; }
-  size_t num_files() const { return files.size(); }
+  [[nodiscard]] size_t num_files() const { return files.size(); }
 
  private:
   int32_t version = 0;
@@ -678,18 +678,18 @@ struct parquet_column_view {
                       std::vector<schema_tree_node> const& schema_tree,
                       rmm::cuda_stream_view stream);
 
-  column_view leaf_column_view() const;
-  gpu::parquet_column_device_view get_device_view(rmm::cuda_stream_view stream) const;
+  [[nodiscard]] column_view leaf_column_view() const;
+  [[nodiscard]] gpu::parquet_column_device_view get_device_view(rmm::cuda_stream_view stream) const;
 
-  column_view cudf_column_view() const { return cudf_col; }
-  parquet::Type physical_type() const { return schema_node.type; }
+  [[nodiscard]] column_view cudf_column_view() const { return cudf_col; }
+  [[nodiscard]] parquet::Type physical_type() const { return schema_node.type; }
 
   std::vector<std::string> const& get_path_in_schema() { return path_in_schema; }
 
   // LIST related member functions
-  uint8_t max_def_level() const noexcept { return _max_def_level; }
-  uint8_t max_rep_level() const noexcept { return _max_rep_level; }
-  bool is_list() const noexcept { return _is_list; }
+  [[nodiscard]] uint8_t max_def_level() const noexcept { return _max_def_level; }
+  [[nodiscard]] uint8_t max_rep_level() const noexcept { return _max_rep_level; }
+  [[nodiscard]] bool is_list() const noexcept { return _is_list; }
 
  private:
   // Schema related members
diff --git a/cpp/src/io/statistics/statistics.cuh b/cpp/src/io/statistics/statistics.cuh
index 755f3416b1d..15fe2544930 100644
--- a/cpp/src/io/statistics/statistics.cuh
+++ b/cpp/src/io/statistics/statistics.cuh
@@ -20,13 +20,15 @@
  */
 
 #pragma once
-#include <stdint.h>
 
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/strings/string_view.hpp>
 #include <cudf/types.hpp>
+
 #include <rmm/cuda_stream_view.hpp>
 
+#include <cstdint>
+
 namespace cudf {
 namespace io {
 
diff --git a/cpp/src/io/statistics/typed_statistics_chunk.cuh b/cpp/src/io/statistics/typed_statistics_chunk.cuh
index 0992a557491..8e35fcf3c44 100644
--- a/cpp/src/io/statistics/typed_statistics_chunk.cuh
+++ b/cpp/src/io/statistics/typed_statistics_chunk.cuh
@@ -92,24 +92,20 @@ struct typed_statistics_chunk<T, true> {
   using E = typename detail::extrema_type<T>::type;
   using A = typename detail::aggregation_type<T>::type;
 
-  uint32_t non_nulls;   //!< number of non-null values in chunk
-  uint32_t null_count;  //!< number of null values in chunk
+  uint32_t non_nulls{0};   //!< number of non-null values in chunk
+  uint32_t null_count{0};  //!< number of null values in chunk
 
   E minimum_value;
   E maximum_value;
   A aggregate;
 
-  uint8_t has_minmax;  //!< Nonzero if min_value and max_values are valid
-  uint8_t has_sum;     //!< Nonzero if sum is valid
+  uint8_t has_minmax{false};  //!< Nonzero if min_value and max_values are valid
+  uint8_t has_sum{false};     //!< Nonzero if sum is valid
 
   __device__ typed_statistics_chunk()
-    : non_nulls(0),
-      null_count(0),
-      minimum_value(detail::minimum_identity<E>()),
+    : minimum_value(detail::minimum_identity<E>()),
       maximum_value(detail::maximum_identity<E>()),
-      aggregate(0),
-      has_minmax(false),
-      has_sum(false)  // Set to true when storing
+      aggregate(0)
   {
   }
 
@@ -140,22 +136,17 @@ template <typename T>
 struct typed_statistics_chunk<T, false> {
   using E = typename detail::extrema_type<T>::type;
 
-  uint32_t non_nulls;   //!< number of non-null values in chunk
-  uint32_t null_count;  //!< number of null values in chunk
+  uint32_t non_nulls{0};   //!< number of non-null values in chunk
+  uint32_t null_count{0};  //!< number of null values in chunk
 
   E minimum_value;
   E maximum_value;
 
-  uint8_t has_minmax;  //!< Nonzero if min_value and max_values are valid
-  uint8_t has_sum;     //!< Nonzero if sum is valid
+  uint8_t has_minmax{false};  //!< Nonzero if min_value and max_values are valid
+  uint8_t has_sum{false};     //!< Nonzero if sum is valid
 
   __device__ typed_statistics_chunk()
-    : non_nulls(0),
-      null_count(0),
-      minimum_value(detail::minimum_identity<E>()),
-      maximum_value(detail::maximum_identity<E>()),
-      has_minmax(false),
-      has_sum(false)  // Set to true when storing
+    : minimum_value(detail::minimum_identity<E>()), maximum_value(detail::maximum_identity<E>())
   {
   }
 
diff --git a/cpp/src/io/utilities/block_utils.cuh b/cpp/src/io/utilities/block_utils.cuh
index 2b4f69df10f..d73f0ebc9b7 100644
--- a/cpp/src/io/utilities/block_utils.cuh
+++ b/cpp/src/io/utilities/block_utils.cuh
@@ -15,7 +15,7 @@
  */
 
 #pragma once
-#include <stdint.h>
+#include <cstdint>
 
 namespace cudf {
 namespace io {
@@ -32,7 +32,7 @@ inline __device__ T shuffle_xor(T var, uint32_t delta)
   return __shfl_xor_sync(~0, var, delta);
 }
 
-inline __device__ void syncwarp(void) { __syncwarp(); }
+inline __device__ void syncwarp() { __syncwarp(); }
 
 inline __device__ uint32_t ballot(int pred) { return __ballot_sync(~0, pred); }
 
@@ -126,18 +126,18 @@ inline __device__ double Int128ToDouble_rn(uint64_t lo, int64_t hi)
 
 inline __device__ uint32_t unaligned_load32(const uint8_t* p)
 {
-  uint32_t ofs        = 3 & reinterpret_cast<uintptr_t>(p);
-  const uint32_t* p32 = reinterpret_cast<const uint32_t*>(p - ofs);
-  uint32_t v          = p32[0];
+  uint32_t ofs    = 3 & reinterpret_cast<uintptr_t>(p);
+  const auto* p32 = reinterpret_cast<const uint32_t*>(p - ofs);
+  uint32_t v      = p32[0];
   return (ofs) ? __funnelshift_r(v, p32[1], ofs * 8) : v;
 }
 
 inline __device__ uint64_t unaligned_load64(const uint8_t* p)
 {
-  uint32_t ofs        = 3 & reinterpret_cast<uintptr_t>(p);
-  const uint32_t* p32 = reinterpret_cast<const uint32_t*>(p - ofs);
-  uint32_t v0         = p32[0];
-  uint32_t v1         = p32[1];
+  uint32_t ofs    = 3 & reinterpret_cast<uintptr_t>(p);
+  const auto* p32 = reinterpret_cast<const uint32_t*>(p - ofs);
+  uint32_t v0     = p32[0];
+  uint32_t v1     = p32[1];
   if (ofs) {
     v0 = __funnelshift_r(v0, v1, ofs * 8);
     v1 = __funnelshift_r(v1, p32[2], ofs * 8);
@@ -148,8 +148,8 @@ inline __device__ uint64_t unaligned_load64(const uint8_t* p)
 template <unsigned int nthreads, bool sync_before_store>
 inline __device__ void memcpy_block(void* dstv, const void* srcv, uint32_t len, uint32_t t)
 {
-  uint8_t* dst       = static_cast<uint8_t*>(dstv);
-  const uint8_t* src = static_cast<const uint8_t*>(srcv);
+  auto* dst       = static_cast<uint8_t*>(dstv);
+  const auto* src = static_cast<const uint8_t*>(srcv);
   uint32_t dst_align_bytes, src_align_bytes, src_align_bits;
   // Align output to 32-bit
   dst_align_bytes = 3 & -reinterpret_cast<intptr_t>(dst);
@@ -166,8 +166,8 @@ inline __device__ void memcpy_block(void* dstv, const void* srcv, uint32_t len,
   src_align_bytes = (uint32_t)(3 & reinterpret_cast<uintptr_t>(src));
   src_align_bits  = src_align_bytes * 8;
   while (len >= 4) {
-    const uint32_t* src32 = reinterpret_cast<const uint32_t*>(src - src_align_bytes);
-    uint32_t copy_cnt     = min(len >> 2, nthreads);
+    const auto* src32 = reinterpret_cast<const uint32_t*>(src - src_align_bytes);
+    uint32_t copy_cnt = min(len >> 2, nthreads);
     uint32_t v;
     if (t < copy_cnt) {
       v = src32[t];
diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp
index 642f3518edd..63d0103ddec 100644
--- a/cpp/src/io/utilities/data_sink.cpp
+++ b/cpp/src/io/utilities/data_sink.cpp
@@ -49,9 +49,9 @@ class file_sink : public data_sink {
 
   size_t bytes_written() override { return _bytes_written; }
 
-  bool supports_device_write() const override { return _cufile_out != nullptr; }
+  [[nodiscard]] bool supports_device_write() const override { return _cufile_out != nullptr; }
 
-  bool is_device_write_preferred(size_t size) const override
+  [[nodiscard]] bool is_device_write_preferred(size_t size) const override
   {
     return _cufile_out != nullptr && _cufile_out->is_cufile_io_preferred(size);
   }
@@ -109,13 +109,13 @@ class host_buffer_sink : public data_sink {
  */
 class void_sink : public data_sink {
  public:
-  explicit void_sink() : _bytes_written(0) {}
+  explicit void_sink() {}
 
   virtual ~void_sink() {}
 
   void host_write(void const* data, size_t size) override { _bytes_written += size; }
 
-  bool supports_device_write() const override { return true; }
+  [[nodiscard]] bool supports_device_write() const override { return true; }
 
   void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
   {
@@ -146,7 +146,10 @@ class user_sink_wrapper : public data_sink {
 
   void host_write(void const* data, size_t size) override { user_sink->host_write(data, size); }
 
-  bool supports_device_write() const override { return user_sink->supports_device_write(); }
+  [[nodiscard]] bool supports_device_write() const override
+  {
+    return user_sink->supports_device_write();
+  }
 
   void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
   {
diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp
index 3de6f35cb0d..6f864ab509f 100644
--- a/cpp/src/io/utilities/datasource.cpp
+++ b/cpp/src/io/utilities/datasource.cpp
@@ -40,9 +40,9 @@ class file_source : public datasource {
 
   virtual ~file_source() = default;
 
-  bool supports_device_read() const override { return _cufile_in != nullptr; }
+  [[nodiscard]] bool supports_device_read() const override { return _cufile_in != nullptr; }
 
-  bool is_device_read_preferred(size_t size) const override
+  [[nodiscard]] bool is_device_read_preferred(size_t size) const override
   {
     return _cufile_in != nullptr && _cufile_in->is_cufile_io_preferred(size);
   }
@@ -79,7 +79,7 @@ class file_source : public datasource {
     return _cufile_in->read_async(offset, read_size, dst, stream);
   }
 
-  size_t size() const override { return _file.size(); }
+  [[nodiscard]] size_t size() const override { return _file.size(); }
 
  protected:
   detail::file_wrapper _file;
@@ -102,7 +102,7 @@ class memory_mapped_source : public file_source {
     if (_file.size() != 0) map(_file.desc(), offset, size);
   }
 
-  virtual ~memory_mapped_source()
+  ~memory_mapped_source() override
   {
     if (_map_addr != nullptr) { munmap(_map_addr, _map_size); }
   }
@@ -210,7 +210,10 @@ class user_datasource_wrapper : public datasource {
     return source->host_read(offset, size);
   }
 
-  bool supports_device_read() const override { return source->supports_device_read(); }
+  [[nodiscard]] bool supports_device_read() const override
+  {
+    return source->supports_device_read();
+  }
 
   size_t device_read(size_t offset,
                      size_t size,
@@ -227,7 +230,7 @@ class user_datasource_wrapper : public datasource {
     return source->device_read(offset, size, stream);
   }
 
-  size_t size() const override { return source->size(); }
+  [[nodiscard]] size_t size() const override { return source->size(); }
 
  private:
   datasource* const source;  ///< A non-owning pointer to the user-implemented datasource
diff --git a/cpp/src/io/utilities/file_io_utilities.hpp b/cpp/src/io/utilities/file_io_utilities.hpp
index 7178418bbbf..fcee4e43a20 100644
--- a/cpp/src/io/utilities/file_io_utilities.hpp
+++ b/cpp/src/io/utilities/file_io_utilities.hpp
@@ -45,8 +45,8 @@ class file_wrapper {
   explicit file_wrapper(std::string const& filepath, int flags);
   explicit file_wrapper(std::string const& filepath, int flags, mode_t mode);
   ~file_wrapper();
-  auto size() const { return _size; }
-  auto desc() const { return fd; }
+  [[nodiscard]] auto size() const { return _size; }
+  [[nodiscard]] auto desc() const { return fd; }
 };
 
 /**
@@ -184,7 +184,7 @@ struct cufile_registered_file {
     register_handle();
   }
 
-  auto const& handle() const noexcept { return cf_handle; }
+  [[nodiscard]] auto const& handle() const noexcept { return cf_handle; }
 
   ~cufile_registered_file();
 
diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp
index cbf914b8da6..367bbfcbdfa 100644
--- a/cpp/src/io/utilities/hostdevice_vector.hpp
+++ b/cpp/src/io/utilities/hostdevice_vector.hpp
@@ -77,9 +77,9 @@ class hostdevice_vector {
     return false;
   }
 
-  size_t max_size() const noexcept { return max_elements; }
-  size_t size() const noexcept { return num_elements; }
-  size_t memory_size() const noexcept { return sizeof(T) * num_elements; }
+  [[nodiscard]] size_t max_size() const noexcept { return max_elements; }
+  [[nodiscard]] size_t size() const noexcept { return num_elements; }
+  [[nodiscard]] size_t memory_size() const noexcept { return sizeof(T) * num_elements; }
 
   T& operator[](size_t i) const { return h_data[i]; }
   T* host_ptr(size_t offset = 0) const { return h_data + offset; }
diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh
index 6da3296055c..878b36191ac 100644
--- a/cpp/src/io/utilities/parsing_utils.cuh
+++ b/cpp/src/io/utilities/parsing_utils.cuh
@@ -68,7 +68,7 @@ struct parse_options {
   cudf::detail::optional_trie trie_na;
   bool multi_delimiter;
 
-  parse_options_view view() const
+  [[nodiscard]] parse_options_view view() const
   {
     return {delimiter,
             terminator,
diff --git a/cpp/src/io/utilities/thread_pool.hpp b/cpp/src/io/utilities/thread_pool.hpp
index c57082034db..952ab58813a 100644
--- a/cpp/src/io/utilities/thread_pool.hpp
+++ b/cpp/src/io/utilities/thread_pool.hpp
@@ -44,7 +44,7 @@ namespace detail {
  * and/or obtain its eventual return value.
  */
 class thread_pool {
-  typedef std::uint_fast32_t ui32;
+  using ui32 = int;
 
  public:
   /**
@@ -79,7 +79,7 @@ class thread_pool {
    *
    * @return The number of queued tasks.
    */
-  size_t get_tasks_queued() const
+  [[nodiscard]] size_t get_tasks_queued() const
   {
     const std::scoped_lock lock(queue_mutex);
     return tasks.size();
@@ -90,7 +90,7 @@ class thread_pool {
    *
    * @return The number of running tasks.
    */
-  ui32 get_tasks_running() const { return tasks_total - (ui32)get_tasks_queued(); }
+  [[nodiscard]] ui32 get_tasks_running() const { return tasks_total - (ui32)get_tasks_queued(); }
 
   /**
    * @brief Get the total number of unfinished tasks - either still in the queue, or running in a
@@ -98,14 +98,14 @@ class thread_pool {
    *
    * @return The total number of tasks.
    */
-  ui32 get_tasks_total() const { return tasks_total; }
+  [[nodiscard]] ui32 get_tasks_total() const { return tasks_total; }
 
   /**
    * @brief Get the number of threads in the pool.
    *
    * @return The number of threads.
    */
-  ui32 get_thread_count() const { return thread_count; }
+  [[nodiscard]] ui32 get_thread_count() const { return thread_count; }
 
   /**
    * @brief Parallelize a loop by splitting it into blocks, submitting each block separately to the
diff --git a/cpp/src/io/utilities/trie.cuh b/cpp/src/io/utilities/trie.cuh
index 1140a08b76b..85834ad2f0e 100644
--- a/cpp/src/io/utilities/trie.cuh
+++ b/cpp/src/io/utilities/trie.cuh
@@ -23,6 +23,8 @@
 
 #include <cudf/utilities/span.hpp>
 
+#include <optional>
+
 namespace cudf {
 namespace detail {
 static constexpr char trie_terminating_character = '\n';
diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu
index c3dc343dd2d..dc62eeec539 100644
--- a/cpp/src/join/conditional_join.cu
+++ b/cpp/src/join/conditional_join.cu
@@ -110,7 +110,6 @@ conditional_join(table_view const& left,
   } else {
     // Allocate storage for the counter used to get the size of the join output
     rmm::device_scalar<std::size_t> size(0, stream, mr);
-    CHECK_CUDA(stream.value());
     if (has_nulls) {
       compute_conditional_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, true>
         <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
@@ -130,7 +129,6 @@ conditional_join(table_view const& left,
           swap_tables,
           size.data());
     }
-    CHECK_CUDA(stream.value());
     join_size = size.value(stream);
   }
 
@@ -178,8 +176,6 @@ conditional_join(table_view const& left,
         swap_tables);
   }
 
-  CHECK_CUDA(stream.value());
-
   auto join_indices = std::make_pair(std::move(left_indices), std::move(right_indices));
 
   // For full joins, get the indices in the right table that were not joined to
@@ -260,7 +256,6 @@ std::size_t compute_conditional_join_output_size(table_view const& left,
 
   // Allocate storage for the counter used to get the size of the join output
   rmm::device_scalar<std::size_t> size(0, stream, mr);
-  CHECK_CUDA(stream.value());
 
   // Determine number of output rows without actually building the output to simply
   // find what the size of the output will be.
@@ -283,8 +278,6 @@ std::size_t compute_conditional_join_output_size(table_view const& left,
         swap_tables,
         size.data());
   }
-  CHECK_CUDA(stream.value());
-
   return size.value(stream);
 }
 
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index 4005d6101bd..c2115c3caa4 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -242,13 +242,13 @@ struct hash_join::hash_join_impl {
             rmm::cuda_stream_view stream,
             rmm::mr::device_memory_resource* mr) const;
 
-  std::size_t inner_join_size(cudf::table_view const& probe,
-                              null_equality compare_nulls,
-                              rmm::cuda_stream_view stream) const;
+  [[nodiscard]] std::size_t inner_join_size(cudf::table_view const& probe,
+                                            null_equality compare_nulls,
+                                            rmm::cuda_stream_view stream) const;
 
-  std::size_t left_join_size(cudf::table_view const& probe,
-                             null_equality compare_nulls,
-                             rmm::cuda_stream_view stream) const;
+  [[nodiscard]] std::size_t left_join_size(cudf::table_view const& probe,
+                                           null_equality compare_nulls,
+                                           rmm::cuda_stream_view stream) const;
 
   std::size_t full_join_size(cudf::table_view const& probe,
                              null_equality compare_nulls,
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index 938a85247f8..526c22d1d5c 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@
 
 #include <rmm/mr/device/polymorphic_allocator.hpp>
 
+#include <cuco/static_map.cuh>
 #include <cuco/static_multimap.cuh>
 
 #include <limits>
@@ -60,6 +61,9 @@ using mixed_multimap_type = cuco::static_multimap<hash_value_type,
                                                   hash_table_allocator_type,
                                                   cuco::double_hashing<1, hash_type, hash_type>>;
 
+using semi_map_type = cuco::
+  static_map<hash_value_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
+
 using row_hash = cudf::row_hasher<default_hash, cudf::nullate::DYNAMIC>;
 
 using row_equality = cudf::row_equality_comparator<cudf::nullate::DYNAMIC>;
diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu
index c609b58132c..0eb0a8de352 100644
--- a/cpp/src/join/mixed_join.cu
+++ b/cpp/src/join/mixed_join.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,8 +33,6 @@
 #include <optional>
 #include <utility>
 
-#include <cstdio>
-
 namespace cudf {
 namespace detail {
 
@@ -57,6 +55,9 @@ mixed_join(
   CUDF_EXPECTS(right_conditional.num_rows() == right_equality.num_rows(),
                "The right conditional and equality tables must have the same number of rows.");
 
+  CUDF_EXPECTS((join_type != join_kind::LEFT_SEMI_JOIN) && (join_type != join_kind::LEFT_ANTI_JOIN),
+               "Left semi and anti joins should use mixed_join_semi.");
+
   auto const right_num_rows{right_conditional.num_rows()};
   auto const left_num_rows{left_conditional.num_rows()};
   auto const swap_tables = (join_type == join_kind::INNER_JOIN) && (right_num_rows > left_num_rows);
@@ -71,25 +72,21 @@ mixed_join(
   // null index for the right table; in others, we return an empty output.
   if (right_num_rows == 0) {
     switch (join_type) {
-      // Left, left anti, and full all return all the row indices from left
-      // with a corresponding NULL from the right.
+      // Left and full joins all return all the row indices from
+      // left with a corresponding NULL from the right.
       case join_kind::LEFT_JOIN:
-      case join_kind::LEFT_ANTI_JOIN:
       case join_kind::FULL_JOIN: return get_trivial_left_join_indices(left_conditional, stream);
-      // Inner and left semi joins return empty output because no matches can exist.
+      // Inner joins return empty output because no matches can exist.
       case join_kind::INNER_JOIN:
-      case join_kind::LEFT_SEMI_JOIN:
         return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
                               std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
       default: CUDF_FAIL("Invalid join kind."); break;
     }
   } else if (left_num_rows == 0) {
     switch (join_type) {
-      // Left, left anti, left semi, and inner joins all return empty sets.
+      // Left and inner joins all return empty sets.
       case join_kind::LEFT_JOIN:
-      case join_kind::LEFT_ANTI_JOIN:
       case join_kind::INNER_JOIN:
-      case join_kind::LEFT_SEMI_JOIN:
         return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
                               std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
       // Full joins need to return the trivial complement.
@@ -160,7 +157,6 @@ mixed_join(
   } else {
     // Allocate storage for the counter used to get the size of the join output
     rmm::device_scalar<std::size_t> size(0, stream, mr);
-    CHECK_CUDA(stream.value());
 
     matches_per_row =
       rmm::device_uvector<size_type>{static_cast<std::size_t>(outer_num_rows), stream, mr};
@@ -199,7 +195,6 @@ mixed_join(
           size.data(),
           mutable_matches_per_row_span);
     }
-    CHECK_CUDA(stream.value());
     join_size = size.value(stream);
   }
 
@@ -229,7 +224,7 @@ mixed_join(
   auto const& join_output_r = right_indices->data();
 
   if (has_nulls) {
-    mixed_join<DEFAULT_JOIN_BLOCK_SIZE, DEFAULT_JOIN_CACHE_SIZE, true>
+    mixed_join<DEFAULT_JOIN_BLOCK_SIZE, true>
       <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
         *left_conditional_view,
         *right_conditional_view,
@@ -244,7 +239,7 @@ mixed_join(
         join_result_offsets.data(),
         swap_tables);
   } else {
-    mixed_join<DEFAULT_JOIN_BLOCK_SIZE, DEFAULT_JOIN_CACHE_SIZE, false>
+    mixed_join<DEFAULT_JOIN_BLOCK_SIZE, false>
       <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
         *left_conditional_view,
         *right_conditional_view,
@@ -260,8 +255,6 @@ mixed_join(
         swap_tables);
   }
 
-  CHECK_CUDA(stream.value());
-
   auto join_indices = std::make_pair(std::move(left_indices), std::move(right_indices));
 
   // For full joins, get the indices in the right table that were not joined to
@@ -292,6 +285,10 @@ compute_mixed_join_output_size(table_view const& left_equality,
   CUDF_EXPECTS(join_type != join_kind::FULL_JOIN,
                "Size estimation is not available for full joins.");
 
+  CUDF_EXPECTS(
+    (join_type != join_kind::LEFT_SEMI_JOIN) && (join_type != join_kind::LEFT_ANTI_JOIN),
+    "Left semi and anti join size estimation should use compute_mixed_join_output_size_semi.");
+
   CUDF_EXPECTS(left_conditional.num_rows() == left_equality.num_rows(),
                "The left conditional and equality tables must have the same number of rows.");
   CUDF_EXPECTS(right_conditional.num_rows() == right_equality.num_rows(),
@@ -319,14 +316,12 @@ compute_mixed_join_output_size(table_view const& left_equality,
       // Left, left anti, and full all return all the row indices from left
       // with a corresponding NULL from the right.
       case join_kind::LEFT_JOIN:
-      case join_kind::LEFT_ANTI_JOIN:
       case join_kind::FULL_JOIN: {
         thrust::fill(matches_per_row->begin(), matches_per_row->end(), 1);
         return {left_num_rows, std::move(matches_per_row)};
       }
       // Inner and left semi joins return empty output because no matches can exist.
-      case join_kind::INNER_JOIN:
-      case join_kind::LEFT_SEMI_JOIN: {
+      case join_kind::INNER_JOIN: {
         thrust::fill(matches_per_row->begin(), matches_per_row->end(), 0);
         return {0, std::move(matches_per_row)};
       }
@@ -336,9 +331,7 @@ compute_mixed_join_output_size(table_view const& left_equality,
     switch (join_type) {
       // Left, left anti, left semi, and inner joins all return empty sets.
       case join_kind::LEFT_JOIN:
-      case join_kind::LEFT_ANTI_JOIN:
-      case join_kind::INNER_JOIN:
-      case join_kind::LEFT_SEMI_JOIN: {
+      case join_kind::INNER_JOIN: {
         thrust::fill(matches_per_row->begin(), matches_per_row->end(), 0);
         return {0, std::move(matches_per_row)};
       }
@@ -397,7 +390,6 @@ compute_mixed_join_output_size(table_view const& left_equality,
 
   // Allocate storage for the counter used to get the size of the join output
   rmm::device_scalar<std::size_t> size(0, stream, mr);
-  CHECK_CUDA(stream.value());
 
   // Determine number of output rows without actually building the output to simply
   // find what the size of the output will be.
@@ -430,7 +422,6 @@ compute_mixed_join_output_size(table_view const& left_equality,
         size.data(),
         matches_per_row_span);
   }
-  CHECK_CUDA(stream.value());
 
   return {size.value(stream), std::move(matches_per_row)};
 }
diff --git a/cpp/src/join/mixed_join_common_utils.cuh b/cpp/src/join/mixed_join_common_utils.cuh
new file mode 100644
index 00000000000..60c909702ab
--- /dev/null
+++ b/cpp/src/join/mixed_join_common_utils.cuh
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <join/join_common_utils.hpp>
+
+#include <cudf/ast/detail/expression_evaluator.cuh>
+#include <cudf/detail/utilities/cuda.cuh>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <cub/cub.cuh>
+
+namespace cudf {
+namespace detail {
+
+/**
+ * @brief Equality comparator for use with cuco map methods that require expression evaluation.
+ *
+ * This class just defines the construction of the class and the necessary
+ * attributes, specifically the equality operator for the non-conditional parts
+ * of the operator and the evaluator used for the conditional.
+ */
+template <bool has_nulls>
+struct expression_equality {
+  __device__ expression_equality(
+    cudf::ast::detail::expression_evaluator<has_nulls> const& evaluator,
+    cudf::ast::detail::IntermediateDataType<has_nulls>* thread_intermediate_storage,
+    bool const swap_tables,
+    row_equality const& equality_probe)
+    : evaluator{evaluator},
+      thread_intermediate_storage{thread_intermediate_storage},
+      swap_tables{swap_tables},
+      equality_probe{equality_probe}
+  {
+  }
+
+  cudf::ast::detail::IntermediateDataType<has_nulls>* thread_intermediate_storage;
+  cudf::ast::detail::expression_evaluator<has_nulls> const& evaluator;
+  bool const swap_tables;
+  row_equality const& equality_probe;
+};
+
+/**
+ * @brief Equality comparator for cuco::static_map queries.
+ *
+ * This equality comparator is designed for use with cuco::static_map's APIs. A
+ * probe hit indicates that the hashes of the keys are equal, at which point
+ * this comparator checks whether the keys themselves are equal (using the
+ * provided equality_probe) and then evaluates the conditional expression
+ */
+template <bool has_nulls>
+struct single_expression_equality : expression_equality<has_nulls> {
+  using expression_equality<has_nulls>::expression_equality;
+
+  // The parameters are build/probe rather than left/right because the operator
+  // is called by cuco's kernels with parameters in this order (note that this
+  // is an implementation detail that we should eventually stop relying on by
+  // defining operators with suitable heterogeneous typing). Rather than
+  // converting to left/right semantics, we can operate directly on build/probe
+  // until we get to the expression evaluator, which needs to convert back to
+  // left/right semantics because the conditional expression need not be
+  // commutative.
+  // TODO: The input types should really be size_type.
+  __device__ __forceinline__ bool operator()(hash_value_type const build_row_index,
+                                             hash_value_type const probe_row_index) const noexcept
+  {
+    auto output_dest = cudf::ast::detail::value_expression_result<bool, has_nulls>();
+    // Two levels of checks:
+    // 1. The contents of the columns involved in the equality condition are equal.
+    // 2. The predicate evaluated on the relevant columns (already encoded in the evaluator)
+    // evaluates to true.
+    if (this->equality_probe(probe_row_index, build_row_index)) {
+      auto const lrow_idx = this->swap_tables ? build_row_index : probe_row_index;
+      auto const rrow_idx = this->swap_tables ? probe_row_index : build_row_index;
+      this->evaluator.evaluate(output_dest,
+                               static_cast<size_type>(lrow_idx),
+                               static_cast<size_type>(rrow_idx),
+                               0,
+                               this->thread_intermediate_storage);
+      return (output_dest.is_valid() && output_dest.value());
+    }
+    return false;
+  }
+};
+
+/**
+ * @brief Equality comparator for cuco::static_multimap queries.
+ *
+ * This equality comparator is designed for use with cuco::static_multimap's
+ * pair* APIs, which will compare equality based on comparing (key, value)
+ * pairs. In the context of joins, these pairs are of the form
+ * (row_hash, row_id). A hash probe hit indicates that hash of a probe row's hash is
+ * equal to the hash of the hash of some row in the multimap, at which point we need an
+ * equality comparator that will check whether the contents of the rows are
+ * identical. This comparator does so by verifying key equality (i.e. that
+ * probe_row_hash == build_row_hash) and then using a row_equality_comparator
+ * to compare the contents of the row indices that are stored as the payload in
+ * the hash map.
+ */
+template <bool has_nulls>
+struct pair_expression_equality : public expression_equality<has_nulls> {
+  using expression_equality<has_nulls>::expression_equality;
+
+  // The parameters are build/probe rather than left/right because the operator
+  // is called by cuco's kernels with parameters in this order (note that this
+  // is an implementation detail that we should eventually stop relying on by
+  // defining operators with suitable heterogeneous typing). Rather than
+  // converting to left/right semantics, we can operate directly on build/probe
+  // until we get to the expression evaluator, which needs to convert back to
+  // left/right semantics because the conditional expression need not be
+  // commutative.
+  __device__ __forceinline__ bool operator()(pair_type const& build_row,
+                                             pair_type const& probe_row) const noexcept
+  {
+    auto output_dest = cudf::ast::detail::value_expression_result<bool, has_nulls>();
+    // Three levels of checks:
+    // 1. Row hashes of the columns involved in the equality condition are equal.
+    // 2. The contents of the columns involved in the equality condition are equal.
+    // 3. The predicate evaluated on the relevant columns (already encoded in the evaluator)
+    // evaluates to true.
+    if ((probe_row.first == build_row.first) &&
+        this->equality_probe(probe_row.second, build_row.second)) {
+      auto const lrow_idx = this->swap_tables ? build_row.second : probe_row.second;
+      auto const rrow_idx = this->swap_tables ? probe_row.second : build_row.second;
+      this->evaluator.evaluate(
+        output_dest, lrow_idx, rrow_idx, 0, this->thread_intermediate_storage);
+      return (output_dest.is_valid() && output_dest.value());
+    }
+    return false;
+  }
+};
+
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/src/join/mixed_join_kernels.cu b/cpp/src/join/mixed_join_kernels.cu
new file mode 100644
index 00000000000..5638f0ddd38
--- /dev/null
+++ b/cpp/src/join/mixed_join_kernels.cu
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <join/hash_join.cuh>
+#include <join/join_common_utils.cuh>
+#include <join/join_common_utils.hpp>
+#include <join/mixed_join_common_utils.cuh>
+
+#include <cudf/ast/detail/expression_evaluator.cuh>
+#include <cudf/ast/detail/expression_parser.hpp>
+#include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/table/table_device_view.cuh>
+#include <cudf/utilities/span.hpp>
+
+#include <cooperative_groups.h>
+
+#include <cub/cub.cuh>
+#include <thrust/iterator/discard_iterator.h>
+
+namespace cudf {
+namespace detail {
+namespace cg = cooperative_groups;
+
+template <cudf::size_type block_size, bool has_nulls>
+__global__ void mixed_join(table_device_view left_table,
+                           table_device_view right_table,
+                           table_device_view probe,
+                           table_device_view build,
+                           row_equality const equality_probe,
+                           join_kind const join_type,
+                           cudf::detail::mixed_multimap_type::device_view hash_table_view,
+                           size_type* join_output_l,
+                           size_type* join_output_r,
+                           cudf::ast::detail::expression_device_view device_expression_data,
+                           cudf::size_type const* join_result_offsets,
+                           bool const swap_tables)
+{
+  // Normally the casting of a shared memory array is used to create multiple
+  // arrays of different types from the shared memory buffer, but here it is
+  // used to circumvent conflicts between arrays of different types between
+  // different template instantiations due to the extern specifier.
+  extern __shared__ char raw_intermediate_storage[];
+  cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
+    reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
+  auto thread_intermediate_storage =
+    &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates];
+
+  cudf::size_type const left_num_rows  = left_table.num_rows();
+  cudf::size_type const right_num_rows = right_table.num_rows();
+  auto const outer_num_rows            = (swap_tables ? right_num_rows : left_num_rows);
+
+  cudf::size_type outer_row_index = threadIdx.x + blockIdx.x * block_size;
+
+  auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
+    left_table, right_table, device_expression_data);
+
+  row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe};
+  auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel();
+  make_pair_function pair_func{hash_probe, empty_key_sentinel};
+
+  if (outer_row_index < outer_num_rows) {
+    // Figure out the number of elements for this key.
+    cg::thread_block_tile<1> this_thread = cg::this_thread();
+    // Figure out the number of elements for this key.
+    auto query_pair = pair_func(outer_row_index);
+    auto equality   = pair_expression_equality<has_nulls>{
+      evaluator, thread_intermediate_storage, swap_tables, equality_probe};
+
+    auto probe_key_begin       = thrust::make_discard_iterator();
+    auto probe_value_begin     = swap_tables ? join_output_r + join_result_offsets[outer_row_index]
+                                             : join_output_l + join_result_offsets[outer_row_index];
+    auto contained_key_begin   = thrust::make_discard_iterator();
+    auto contained_value_begin = swap_tables ? join_output_l + join_result_offsets[outer_row_index]
+                                             : join_output_r + join_result_offsets[outer_row_index];
+
+    if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) {
+      hash_table_view.pair_retrieve_outer(this_thread,
+                                          query_pair,
+                                          probe_key_begin,
+                                          probe_value_begin,
+                                          contained_key_begin,
+                                          contained_value_begin,
+                                          equality);
+    } else {
+      hash_table_view.pair_retrieve(this_thread,
+                                    query_pair,
+                                    probe_key_begin,
+                                    probe_value_begin,
+                                    contained_key_begin,
+                                    contained_value_begin,
+                                    equality);
+    }
+  }
+}
+
+template __global__ void mixed_join<DEFAULT_JOIN_BLOCK_SIZE, true>(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  size_type* join_output_l,
+  size_type* join_output_r,
+  cudf::ast::detail::expression_device_view device_expression_data,
+  cudf::size_type const* join_result_offsets,
+  bool const swap_tables);
+
+template __global__ void mixed_join<DEFAULT_JOIN_BLOCK_SIZE, false>(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  size_type* join_output_l,
+  size_type* join_output_r,
+  cudf::ast::detail::expression_device_view device_expression_data,
+  cudf::size_type const* join_result_offsets,
+  bool const swap_tables);
+
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/src/join/mixed_join_kernels.cuh b/cpp/src/join/mixed_join_kernels.cuh
index 9812d4c4b7d..18d5e22fd1c 100644
--- a/cpp/src/join/mixed_join_kernels.cuh
+++ b/cpp/src/join/mixed_join_kernels.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,93 +16,15 @@
 
 #pragma once
 
-#include <join/join_common_utils.cuh>
 #include <join/join_common_utils.hpp>
+#include <join/mixed_join_common_utils.cuh>
 
-#include <cudf/ast/detail/expression_evaluator.cuh>
 #include <cudf/ast/detail/expression_parser.hpp>
-#include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/span.hpp>
-#include <cudf/utilities/type_dispatcher.hpp>
-
-#include <cooperative_groups.h>
-
-#include <cub/cub.cuh>
-#include <cuco/detail/pair.cuh>
-#include <thrust/equal.h>
-#include <thrust/iterator/discard_iterator.h>
-#include <thrust/iterator/zip_iterator.h>
 
 namespace cudf {
 namespace detail {
-namespace cg = cooperative_groups;
-
-/**
- * @brief Device functor to determine if two pairs are identical.
- *
- * This equality comparator is designed for use with cuco::static_multimap's
- * pair* APIs, which will compare equality based on comparing (key, value)
- * pairs. In the context of joins, these pairs are of the form
- * (row_hash, row_id). A hash probe hit indicates that hash of a probe row's hash is
- * equal to the hash of the hash of some row in the multimap, at which point we need an
- * equality comparator that will check whether the contents of the rows are
- * identical. This comparator does so by verifying key equality (i.e. that
- * probe_row_hash == build_row_hash) and then using a row_equality_comparator
- * to compare the contents of the row indices that are stored as the payload in
- * the hash map.
- *
- * This particular comparator is a specialized version of the pair_equality used in hash joins. This
- * version also checks the expression_evaluator.
- */
-template <bool has_nulls>
-class pair_expression_equality {
- public:
-  __device__ pair_expression_equality(
-    cudf::ast::detail::expression_evaluator<has_nulls> const& evaluator,
-    cudf::ast::detail::IntermediateDataType<has_nulls>* thread_intermediate_storage,
-    bool const swap_tables,
-    row_equality const& equality_probe)
-    : evaluator{evaluator},
-      thread_intermediate_storage{thread_intermediate_storage},
-      swap_tables{swap_tables},
-      equality_probe{equality_probe}
-  {
-  }
-
-  // The parameters are build/probe rather than left/right because the operator
-  // is called by cuco's kernels with parameters in this order (note that this
-  // is an implementation detail that we should eventually stop relying on by
-  // defining operators with suitable heterogeneous typing). Rather than
-  // converting to left/right semantics, we can operate directly on build/probe
-  // until we get to the expression evaluator, which needs to convert back to
-  // left/right semantics because the conditional expression need not be
-  // commutative.
-  __device__ __forceinline__ bool operator()(const pair_type& build_row,
-                                             const pair_type& probe_row) const noexcept
-  {
-    auto output_dest = cudf::ast::detail::value_expression_result<bool, has_nulls>();
-    // Three levels of checks:
-    // 1. Row hashes of the columns involved in the equality condition are equal.
-    // 2. The contents of the columns involved in the equality condition are equal.
-    // 3. The predicate evaluated on the relevant columns (already encoded in the evaluator)
-    // evaluates to true.
-    if ((probe_row.first == build_row.first) &&
-        equality_probe(probe_row.second, build_row.second)) {
-      auto const lrow_idx = swap_tables ? build_row.second : probe_row.second;
-      auto const rrow_idx = swap_tables ? probe_row.second : build_row.second;
-      evaluator.evaluate(output_dest, lrow_idx, rrow_idx, 0, thread_intermediate_storage);
-      return (output_dest.is_valid() && output_dest.value());
-    }
-    return false;
-  }
-
- private:
-  cudf::ast::detail::IntermediateDataType<has_nulls>* thread_intermediate_storage;
-  cudf::ast::detail::expression_evaluator<has_nulls> const& evaluator;
-  bool const swap_tables;
-  row_equality const& equality_probe;
-};
 
 /**
  * @brief Computes the output size of joining the left table to the right table.
@@ -146,63 +68,7 @@ __global__ void compute_mixed_join_output_size(
   ast::detail::expression_device_view device_expression_data,
   bool const swap_tables,
   std::size_t* output_size,
-  cudf::device_span<cudf::size_type> matches_per_row)
-{
-  // The (required) extern storage of the shared memory array leads to
-  // conflicting declarations between different templates. The easiest
-  // workaround is to declare an arbitrary (here char) array type then cast it
-  // after the fact to the appropriate type.
-  extern __shared__ char raw_intermediate_storage[];
-  cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
-    reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
-  auto thread_intermediate_storage =
-    intermediate_storage + (threadIdx.x * device_expression_data.num_intermediates);
-
-  std::size_t thread_counter{0};
-  cudf::size_type const start_idx      = threadIdx.x + blockIdx.x * block_size;
-  cudf::size_type const stride         = block_size * gridDim.x;
-  cudf::size_type const left_num_rows  = left_table.num_rows();
-  cudf::size_type const right_num_rows = right_table.num_rows();
-  auto const outer_num_rows            = (swap_tables ? right_num_rows : left_num_rows);
-
-  auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
-    left_table, right_table, device_expression_data);
-
-  // TODO: The hash join code assumes that nulls exist here, so I'm doing the
-  // same but at some point we may want to benchmark that.
-  row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe};
-  auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel();
-  make_pair_function pair_func{hash_probe, empty_key_sentinel};
-
-  for (cudf::size_type outer_row_index = start_idx; outer_row_index < outer_num_rows;
-       outer_row_index += stride) {
-    // Figure out the number of elements for this key.
-    cg::thread_block_tile<1> this_thread = cg::this_thread();
-    auto query_pair                      = pair_func(outer_row_index);
-    // TODO: Address asymmetry in operator.
-    auto count_equality = pair_expression_equality<has_nulls>{
-      evaluator, thread_intermediate_storage, swap_tables, equality_probe};
-    // TODO: This entire kernel probably won't work for left anti joins since I
-    // need to use a normal map (not a multimap), so this condition is probably
-    // overspecified at the moment.
-    if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::LEFT_ANTI_JOIN ||
-        join_type == join_kind::FULL_JOIN) {
-      matches_per_row[outer_row_index] =
-        hash_table_view.pair_count_outer(this_thread, query_pair, count_equality);
-    } else {
-      matches_per_row[outer_row_index] =
-        hash_table_view.pair_count(this_thread, query_pair, count_equality);
-    }
-    thread_counter += matches_per_row[outer_row_index];
-  }
-
-  using BlockReduce = cub::BlockReduce<cudf::size_type, block_size>;
-  __shared__ typename BlockReduce::TempStorage temp_storage;
-  std::size_t block_counter = BlockReduce(temp_storage).Sum(thread_counter);
-
-  // Add block counter to global counter
-  if (threadIdx.x == 0) atomicAdd(output_size, block_counter);
-}
+  cudf::device_span<cudf::size_type> matches_per_row);
 
 /**
  * @brief Performs a join using the combination of a hash lookup to identify
@@ -215,7 +81,6 @@ __global__ void compute_mixed_join_output_size(
  * between probe and build rows.
  *
  * @tparam block_size The number of threads per block for this kernel
- * @tparam output_cache_size The side of the shared memory buffer to cache join
  * @tparam has_nulls Whether or not the inputs may contain nulls.
  *
  * @param[in] left_table The left table
@@ -235,11 +100,7 @@ __global__ void compute_mixed_join_output_size(
  * @param[in] swap_tables If true, the kernel was launched with one thread per right row and
  * the kernel needs to internally loop over left rows. Otherwise, loop over right rows.
  */
-template <cudf::size_type block_size,
-          cudf::size_type output_cache_size,
-          bool has_nulls,
-          typename OutputIt1,
-          typename OutputIt2>
+template <cudf::size_type block_size, bool has_nulls>
 __global__ void mixed_join(table_device_view left_table,
                            table_device_view right_table,
                            table_device_view probe,
@@ -247,75 +108,11 @@ __global__ void mixed_join(table_device_view left_table,
                            row_equality const equality_probe,
                            join_kind const join_type,
                            cudf::detail::mixed_multimap_type::device_view hash_table_view,
-                           OutputIt1 join_output_l,
-                           OutputIt2 join_output_r,
+                           size_type* join_output_l,
+                           size_type* join_output_r,
                            cudf::ast::detail::expression_device_view device_expression_data,
                            cudf::size_type const* join_result_offsets,
-                           bool const swap_tables)
-{
-  // Normally the casting of a shared memory array is used to create multiple
-  // arrays of different types from the shared memory buffer, but here it is
-  // used to circumvent conflicts between arrays of different types between
-  // different template instantiations due to the extern specifier.
-  extern __shared__ char raw_intermediate_storage[];
-  cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
-    reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
-  auto thread_intermediate_storage =
-    &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates];
-
-  cudf::size_type const left_num_rows  = left_table.num_rows();
-  cudf::size_type const right_num_rows = right_table.num_rows();
-  auto const outer_num_rows            = (swap_tables ? right_num_rows : left_num_rows);
-
-  cudf::size_type outer_row_index = threadIdx.x + blockIdx.x * block_size;
-
-  auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
-    left_table, right_table, device_expression_data);
-
-  // TODO: The hash join code assumes that nulls exist here, so I'm doing the
-  // same but at some point we may want to benchmark that.
-  row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe};
-  auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel();
-  make_pair_function pair_func{hash_probe, empty_key_sentinel};
-
-  if (outer_row_index < outer_num_rows) {
-    // Figure out the number of elements for this key.
-    cg::thread_block_tile<1> this_thread = cg::this_thread();
-    // Figure out the number of elements for this key.
-    auto query_pair = pair_func(outer_row_index);
-    auto equality   = pair_expression_equality<has_nulls>{
-      evaluator, thread_intermediate_storage, swap_tables, equality_probe};
-
-    auto probe_key_begin       = thrust::make_discard_iterator();
-    auto probe_value_begin     = swap_tables ? join_output_r + join_result_offsets[outer_row_index]
-                                             : join_output_l + join_result_offsets[outer_row_index];
-    auto contained_key_begin   = thrust::make_discard_iterator();
-    auto contained_value_begin = swap_tables ? join_output_l + join_result_offsets[outer_row_index]
-                                             : join_output_r + join_result_offsets[outer_row_index];
-
-    // TODO: This entire kernel probably won't work for left anti joins since I
-    // need to use a normal map (not a multimap), so this condition is probably
-    // overspecified at the moment.
-    if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::LEFT_ANTI_JOIN ||
-        join_type == join_kind::FULL_JOIN) {
-      hash_table_view.pair_retrieve_outer(this_thread,
-                                          query_pair,
-                                          probe_key_begin,
-                                          probe_value_begin,
-                                          contained_key_begin,
-                                          contained_value_begin,
-                                          equality);
-    } else {
-      hash_table_view.pair_retrieve(this_thread,
-                                    query_pair,
-                                    probe_key_begin,
-                                    probe_value_begin,
-                                    contained_key_begin,
-                                    contained_value_begin,
-                                    equality);
-    }
-  }
-}
+                           bool const swap_tables);
 
 }  // namespace detail
 
diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu
new file mode 100644
index 00000000000..c8cfc9998f0
--- /dev/null
+++ b/cpp/src/join/mixed_join_kernels_semi.cu
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <join/join_common_utils.cuh>
+#include <join/join_common_utils.hpp>
+#include <join/mixed_join_common_utils.cuh>
+
+#include <cudf/ast/detail/expression_evaluator.cuh>
+#include <cudf/ast/detail/expression_parser.hpp>
+#include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/table/table_device_view.cuh>
+#include <cudf/utilities/span.hpp>
+
+#include <cub/cub.cuh>
+
+namespace cudf {
+namespace detail {
+
+namespace cg = cooperative_groups;
+
+template <cudf::size_type block_size, bool has_nulls>
+__global__ void mixed_join_semi(table_device_view left_table,
+                                table_device_view right_table,
+                                table_device_view probe,
+                                table_device_view build,
+                                row_equality const equality_probe,
+                                join_kind const join_type,
+                                cudf::detail::semi_map_type::device_view hash_table_view,
+                                size_type* join_output_l,
+                                cudf::ast::detail::expression_device_view device_expression_data,
+                                cudf::size_type const* join_result_offsets,
+                                bool const swap_tables)
+{
+  // Normally the casting of a shared memory array is used to create multiple
+  // arrays of different types from the shared memory buffer, but here it is
+  // used to circumvent conflicts between arrays of different types between
+  // different template instantiations due to the extern specifier.
+  extern __shared__ char raw_intermediate_storage[];
+  cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
+    reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
+  auto thread_intermediate_storage =
+    &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates];
+
+  cudf::size_type const left_num_rows  = left_table.num_rows();
+  cudf::size_type const right_num_rows = right_table.num_rows();
+  auto const outer_num_rows            = (swap_tables ? right_num_rows : left_num_rows);
+
+  cudf::size_type outer_row_index = threadIdx.x + blockIdx.x * block_size;
+
+  auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
+    left_table, right_table, device_expression_data);
+
+  row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe};
+
+  if (outer_row_index < outer_num_rows) {
+    // Figure out the number of elements for this key.
+    auto equality = single_expression_equality<has_nulls>{
+      evaluator, thread_intermediate_storage, swap_tables, equality_probe};
+
+    if ((join_type == join_kind::LEFT_ANTI_JOIN) !=
+        (hash_table_view.contains(outer_row_index, hash_probe, equality))) {
+      *(join_output_l + join_result_offsets[outer_row_index]) = outer_row_index;
+    }
+  }
+}
+
+template __global__ void mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, true>(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::semi_map_type::device_view hash_table_view,
+  size_type* join_output_l,
+  cudf::ast::detail::expression_device_view device_expression_data,
+  cudf::size_type const* join_result_offsets,
+  bool const swap_tables);
+
+template __global__ void mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, false>(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::semi_map_type::device_view hash_table_view,
+  size_type* join_output_l,
+  cudf::ast::detail::expression_device_view device_expression_data,
+  cudf::size_type const* join_result_offsets,
+  bool const swap_tables);
+
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/src/join/mixed_join_kernels_semi.cuh b/cpp/src/join/mixed_join_kernels_semi.cuh
new file mode 100644
index 00000000000..0a590f5b09a
--- /dev/null
+++ b/cpp/src/join/mixed_join_kernels_semi.cuh
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <join/join_common_utils.hpp>
+#include <join/mixed_join_common_utils.cuh>
+
+#include <cudf/ast/detail/expression_parser.hpp>
+#include <cudf/table/table_device_view.cuh>
+#include <cudf/utilities/span.hpp>
+
+namespace cudf {
+namespace detail {
+
+/**
+ * @brief Computes the output size of joining the left table to the right table for semi/anti joins.
+ *
+ * This method probes the hash table with each row in the probe table using a
+ * custom equality comparator that also checks that the conditional expression
+ * evaluates to true between the left/right tables when a match is found
+ * between probe and build rows.
+ *
+ * @tparam block_size The number of threads per block for this kernel
+ * @tparam has_nulls Whether or not the inputs may contain nulls.
+ *
+ * @param[in] left_table The left table
+ * @param[in] right_table The right table
+ * @param[in] probe The table with which to probe the hash table for matches.
+ * @param[in] build The table with which the hash table was built.
+ * @param[in] equality_probe The equality comparator used when probing the hash table.
+ * @param[in] join_type The type of join to be performed
+ * @param[in] hash_table_view The hash table built from `build`.
+ * @param[in] device_expression_data Container of device data required to evaluate the desired
+ * expression.
+ * @param[in] swap_tables If true, the kernel was launched with one thread per right row and
+ * the kernel needs to internally loop over left rows. Otherwise, loop over right rows.
+ * @param[out] output_size The resulting output size
+ * @param[out] matches_per_row The number of matches in one pair of
+ * equality/conditional tables for each row in the other pair of tables. If
+ * swap_tables is true, matches_per_row corresponds to the right_table,
+ * otherwise it corresponds to the left_table. Note that corresponding swap of
+ * left/right tables to determine which is the build table and which is the
+ * probe table has already happened on the host.
+ */
+template <int block_size, bool has_nulls>
+__global__ void compute_mixed_join_output_size_semi(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::semi_map_type::device_view hash_table_view,
+  ast::detail::expression_device_view device_expression_data,
+  bool const swap_tables,
+  std::size_t* output_size,
+  cudf::device_span<cudf::size_type> matches_per_row);
+
+/**
+ * @brief Performs a semi/anti join using the combination of a hash lookup to
+ * identify equal rows between one pair of tables and the evaluation of an
+ * expression containing an arbitrary expression.
+ *
+ * This method probes the hash table with each row in the probe table using a
+ * custom equality comparator that also checks that the conditional expression
+ * evaluates to true between the left/right tables when a match is found
+ * between probe and build rows.
+ *
+ * @tparam block_size The number of threads per block for this kernel
+ * @tparam has_nulls Whether or not the inputs may contain nulls.
+ *
+ * @param[in] left_table The left table
+ * @param[in] right_table The right table
+ * @param[in] probe The table with which to probe the hash table for matches.
+ * @param[in] build The table with which the hash table was built.
+ * @param[in] equality_probe The equality comparator used when probing the hash table.
+ * @param[in] join_type The type of join to be performed
+ * @param[in] hash_table_view The hash table built from `build`.
+ * @param[out] join_output_l The left result of the join operation
+ * @param[in] device_expression_data Container of device data required to evaluate the desired
+ * expression.
+ * @param[in] join_result_offsets The starting indices in join_output[l|r]
+ * where the matches for each row begin. Equivalent to a prefix sum of
+ * matches_per_row.
+ * @param[in] swap_tables If true, the kernel was launched with one thread per right row and
+ * the kernel needs to internally loop over left rows. Otherwise, loop over right rows.
+ */
+template <cudf::size_type block_size, bool has_nulls>
+__global__ void mixed_join_semi(table_device_view left_table,
+                                table_device_view right_table,
+                                table_device_view probe,
+                                table_device_view build,
+                                row_equality const equality_probe,
+                                join_kind const join_type,
+                                cudf::detail::semi_map_type::device_view hash_table_view,
+                                size_type* join_output_l,
+                                cudf::ast::detail::expression_device_view device_expression_data,
+                                cudf::size_type const* join_result_offsets,
+                                bool const swap_tables);
+
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu
new file mode 100644
index 00000000000..f38e653c4a6
--- /dev/null
+++ b/cpp/src/join/mixed_join_semi.cu
@@ -0,0 +1,569 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/ast/detail/expression_parser.hpp>
+#include <cudf/ast/expressions.hpp>
+#include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/join.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_device_view.cuh>
+#include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
+#include <join/hash_join.cuh>
+#include <join/join_common_utils.cuh>
+#include <join/join_common_utils.hpp>
+#include <join/mixed_join_kernels_semi.cuh>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <optional>
+#include <utility>
+
+namespace cudf {
+namespace detail {
+
+namespace {
+/**
+ * @brief Device functor to create a pair of hash value and index for a given row.
+ */
+struct make_pair_function_semi {
+  __device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept
+  {
+    // The value is irrelevant since we only ever use the hash map to check for
+    // membership of a particular row index.
+    return cuco::make_pair<hash_value_type, size_type>(i, 0);
+  }
+};
+
+/**
+ * @brief Equality comparator that composes two row_equality comparators.
+ */
+class double_row_equality {
+ public:
+  double_row_equality(row_equality equality_comparator, row_equality conditional_comparator)
+    : _equality_comparator{equality_comparator}, _conditional_comparator{conditional_comparator}
+  {
+  }
+
+  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
+  {
+    return _equality_comparator(lhs_row_index, rhs_row_index) &&
+           _conditional_comparator(lhs_row_index, rhs_row_index);
+  }
+
+ private:
+  row_equality _equality_comparator;
+  row_equality _conditional_comparator;
+};
+
+}  // namespace
+
+std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls,
+  join_kind join_type,
+  std::optional<std::pair<std::size_t, device_span<size_type const>>> output_size_data,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
+{
+  CUDF_EXPECTS((join_type != join_kind::INNER_JOIN) && (join_type != join_kind::LEFT_JOIN) &&
+                 (join_type != join_kind::FULL_JOIN),
+               "Inner, left, and full joins should use mixed_join.");
+
+  CUDF_EXPECTS(left_conditional.num_rows() == left_equality.num_rows(),
+               "The left conditional and equality tables must have the same number of rows.");
+  CUDF_EXPECTS(right_conditional.num_rows() == right_equality.num_rows(),
+               "The right conditional and equality tables must have the same number of rows.");
+
+  auto const right_num_rows{right_conditional.num_rows()};
+  auto const left_num_rows{left_conditional.num_rows()};
+  auto const swap_tables = (join_type == join_kind::INNER_JOIN) && (right_num_rows > left_num_rows);
+
+  // The "outer" table is the larger of the two tables. The kernels are
+  // launched with one thread per row of the outer table, which also means that
+  // it is the probe table for the hash
+  auto const outer_num_rows{swap_tables ? right_num_rows : left_num_rows};
+
+  // We can immediately filter out cases where the right table is empty. In
+  // some cases, we return all the rows of the left table with a corresponding
+  // null index for the right table; in others, we return an empty output.
+  if (right_num_rows == 0) {
+    switch (join_type) {
+      // Anti and semi return all the row indices from left
+      // with a corresponding NULL from the right.
+      case join_kind::LEFT_ANTI_JOIN:
+        return get_trivial_left_join_indices(left_conditional, stream).first;
+      // Inner and left semi joins return empty output because no matches can exist.
+      case join_kind::LEFT_SEMI_JOIN:
+        return std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr);
+      default: CUDF_FAIL("Invalid join kind."); break;
+    }
+  } else if (left_num_rows == 0) {
+    switch (join_type) {
+      // Anti and semi joins both return empty sets.
+      case join_kind::LEFT_ANTI_JOIN:
+      case join_kind::LEFT_SEMI_JOIN:
+        return std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr);
+      default: CUDF_FAIL("Invalid join kind."); break;
+    }
+  }
+
+  // If evaluating the expression may produce null outputs we create a nullable
+  // output column and follow the null-supporting expression evaluation code
+  // path.
+  auto const has_nulls =
+    cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) ||
+    binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream);
+
+  auto const parser = ast::detail::expression_parser{
+    binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr};
+  CUDF_EXPECTS(parser.output_type().id() == type_id::BOOL8,
+               "The expression must produce a boolean output.");
+
+  // TODO: The non-conditional join impls start with a dictionary matching,
+  // figure out what that is and what it's needed for (and if conditional joins
+  // need to do the same).
+  auto& probe                  = swap_tables ? right_equality : left_equality;
+  auto& build                  = swap_tables ? left_equality : right_equality;
+  auto probe_view              = table_device_view::create(probe, stream);
+  auto build_view              = table_device_view::create(build, stream);
+  auto left_conditional_view   = table_device_view::create(left_conditional, stream);
+  auto right_conditional_view  = table_device_view::create(right_conditional, stream);
+  auto& build_conditional_view = swap_tables ? left_conditional_view : right_conditional_view;
+  row_equality equality_probe{
+    cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls};
+
+  semi_map_type hash_table{compute_hash_table_size(build.num_rows()),
+                           std::numeric_limits<hash_value_type>::max(),
+                           cudf::detail::JoinNoneValue,
+                           detail::hash_table_allocator_type{default_allocator<char>{}, stream},
+                           stream.value()};
+
+  // Create hash table containing all keys found in right table
+  // TODO: To add support for nested columns we will need to flatten in many
+  // places. However, this probably isn't worth adding any time soon since we
+  // won't be able to support AST conditions for those types anyway.
+  auto const build_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(build)};
+  row_hash const hash_build{build_nulls, *build_view};
+  // Since we may see multiple rows that are identical in the equality tables
+  // but differ in the conditional tables, the equality comparator used for
+  // insertion must account for both sets of tables. An alternative solution
+  // would be to use a multimap, but that solution would store duplicates where
+  // equality and conditional rows are equal, so this approach is preferable.
+  // One way to make this solution even more efficient would be to only include
+  // the columns of the conditional table that are used by the expression, but
+  // that requires additional plumbing through the AST machinery and is out of
+  // scope for now.
+  row_equality equality_build_equality{build_nulls, *build_view, *build_view, compare_nulls};
+  row_equality equality_build_conditional{
+    build_nulls, *build_conditional_view, *build_conditional_view, compare_nulls};
+  double_row_equality equality_build{equality_build_equality, equality_build_conditional};
+  make_pair_function_semi pair_func_build{};
+
+  auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func_build);
+
+  // skip rows that are null here.
+  if ((compare_nulls == null_equality::EQUAL) or (not nullable(build))) {
+    hash_table.insert(iter, iter + right_num_rows, hash_build, equality_build, stream.value());
+  } else {
+    thrust::counting_iterator<cudf::size_type> stencil(0);
+    auto const [row_bitmask, _] = cudf::detail::bitmask_and(build, stream);
+    row_is_valid pred{static_cast<bitmask_type const*>(row_bitmask.data())};
+
+    // insert valid rows
+    hash_table.insert_if(
+      iter, iter + right_num_rows, stencil, pred, hash_build, equality_build, stream.value());
+  }
+
+  auto hash_table_view = hash_table.get_device_view();
+
+  // For inner joins we support optimizing the join by launching one thread for
+  // whichever table is larger rather than always using the left table.
+  detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
+  auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block;
+  join_kind const kernel_join_type =
+    join_type == join_kind::FULL_JOIN ? join_kind::LEFT_JOIN : join_type;
+
+  // If the join size data was not provided as an input, compute it here.
+  std::size_t join_size;
+  // Using an optional because we only need to allocate a new vector if one was
+  // not passed as input, and rmm::device_uvector is not default constructible
+  std::optional<rmm::device_uvector<size_type>> matches_per_row{};
+  device_span<size_type const> matches_per_row_span{};
+
+  if (output_size_data.has_value()) {
+    join_size            = output_size_data->first;
+    matches_per_row_span = output_size_data->second;
+  } else {
+    // Allocate storage for the counter used to get the size of the join output
+    rmm::device_scalar<std::size_t> size(0, stream, mr);
+
+    matches_per_row =
+      rmm::device_uvector<size_type>{static_cast<std::size_t>(outer_num_rows), stream, mr};
+    // Note that the view goes out of scope after this else statement, but the
+    // data owned by matches_per_row stays alive so the data pointer is valid.
+    auto mutable_matches_per_row_span = cudf::device_span<size_type>{
+      matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
+    matches_per_row_span = cudf::device_span<size_type const>{
+      matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
+    if (has_nulls) {
+      compute_mixed_join_output_size_semi<DEFAULT_JOIN_BLOCK_SIZE, true>
+        <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
+          *left_conditional_view,
+          *right_conditional_view,
+          *probe_view,
+          *build_view,
+          equality_probe,
+          kernel_join_type,
+          hash_table_view,
+          parser.device_expression_data,
+          swap_tables,
+          size.data(),
+          mutable_matches_per_row_span);
+    } else {
+      compute_mixed_join_output_size_semi<DEFAULT_JOIN_BLOCK_SIZE, false>
+        <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
+          *left_conditional_view,
+          *right_conditional_view,
+          *probe_view,
+          *build_view,
+          equality_probe,
+          kernel_join_type,
+          hash_table_view,
+          parser.device_expression_data,
+          swap_tables,
+          size.data(),
+          mutable_matches_per_row_span);
+    }
+    join_size = size.value(stream);
+  }
+
+  if (join_size == 0) { return std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr); }
+
+  // Given the number of matches per row, we need to compute the offsets for insertion.
+  auto join_result_offsets =
+    rmm::device_uvector<size_type>{static_cast<std::size_t>(outer_num_rows), stream, mr};
+  thrust::exclusive_scan(rmm::exec_policy{stream},
+                         matches_per_row_span.begin(),
+                         matches_per_row_span.end(),
+                         join_result_offsets.begin());
+
+  auto left_indices = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
+  auto const& join_output_l = left_indices->data();
+
+  if (has_nulls) {
+    mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, true>
+      <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
+        *left_conditional_view,
+        *right_conditional_view,
+        *probe_view,
+        *build_view,
+        equality_probe,
+        kernel_join_type,
+        hash_table_view,
+        join_output_l,
+        parser.device_expression_data,
+        join_result_offsets.data(),
+        swap_tables);
+  } else {
+    mixed_join_semi<DEFAULT_JOIN_BLOCK_SIZE, false>
+      <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
+        *left_conditional_view,
+        *right_conditional_view,
+        *probe_view,
+        *build_view,
+        equality_probe,
+        kernel_join_type,
+        hash_table_view,
+        join_output_l,
+        parser.device_expression_data,
+        join_result_offsets.data(),
+        swap_tables);
+  }
+
+  return left_indices;
+}
+
+std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>>
+compute_mixed_join_output_size_semi(table_view const& left_equality,
+                                    table_view const& right_equality,
+                                    table_view const& left_conditional,
+                                    table_view const& right_conditional,
+                                    ast::expression const& binary_predicate,
+                                    null_equality compare_nulls,
+                                    join_kind join_type,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr)
+{
+  CUDF_EXPECTS(
+    (join_type != join_kind::INNER_JOIN) && (join_type != join_kind::LEFT_JOIN) &&
+      (join_type != join_kind::FULL_JOIN),
+    "Inner, left, and full join size estimation should use compute_mixed_join_output_size.");
+
+  CUDF_EXPECTS(left_conditional.num_rows() == left_equality.num_rows(),
+               "The left conditional and equality tables must have the same number of rows.");
+  CUDF_EXPECTS(right_conditional.num_rows() == right_equality.num_rows(),
+               "The right conditional and equality tables must have the same number of rows.");
+
+  auto const right_num_rows{right_conditional.num_rows()};
+  auto const left_num_rows{left_conditional.num_rows()};
+  auto const swap_tables = (join_type == join_kind::INNER_JOIN) && (right_num_rows > left_num_rows);
+
+  // The "outer" table is the larger of the two tables. The kernels are
+  // launched with one thread per row of the outer table, which also means that
+  // it is the probe table for the hash
+  auto const outer_num_rows{swap_tables ? right_num_rows : left_num_rows};
+
+  auto matches_per_row = std::make_unique<rmm::device_uvector<size_type>>(
+    static_cast<std::size_t>(outer_num_rows), stream, mr);
+  auto matches_per_row_span = cudf::device_span<size_type>{
+    matches_per_row->begin(), static_cast<std::size_t>(outer_num_rows)};
+
+  // We can immediately filter out cases where one table is empty. In
+  // some cases, we return all the rows of the other table with a corresponding
+  // null index for the empty table; in others, we return an empty output.
+  if (right_num_rows == 0) {
+    switch (join_type) {
+      // Left, left anti, and full all return all the row indices from left
+      // with a corresponding NULL from the right.
+      case join_kind::LEFT_ANTI_JOIN: {
+        thrust::fill(matches_per_row->begin(), matches_per_row->end(), 1);
+        return {left_num_rows, std::move(matches_per_row)};
+      }
+      // Inner and left semi joins return empty output because no matches can exist.
+      case join_kind::LEFT_SEMI_JOIN: return {0, std::move(matches_per_row)};
+      default: CUDF_FAIL("Invalid join kind."); break;
+    }
+  } else if (left_num_rows == 0) {
+    switch (join_type) {
+      // Left, left anti, left semi, and inner joins all return empty sets.
+      case join_kind::LEFT_ANTI_JOIN:
+      case join_kind::LEFT_SEMI_JOIN: {
+        thrust::fill(matches_per_row->begin(), matches_per_row->end(), 0);
+        return {0, std::move(matches_per_row)};
+      }
+      default: CUDF_FAIL("Invalid join kind."); break;
+    }
+  }
+
+  // If evaluating the expression may produce null outputs we create a nullable
+  // output column and follow the null-supporting expression evaluation code
+  // path.
+  auto const has_nulls =
+    cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) ||
+    binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream);
+
+  auto const parser = ast::detail::expression_parser{
+    binary_predicate, left_conditional, right_conditional, has_nulls, stream, mr};
+  CUDF_EXPECTS(parser.output_type().id() == type_id::BOOL8,
+               "The expression must produce a boolean output.");
+
+  // TODO: The non-conditional join impls start with a dictionary matching,
+  // figure out what that is and what it's needed for (and if conditional joins
+  // need to do the same).
+  auto& probe                  = swap_tables ? right_equality : left_equality;
+  auto& build                  = swap_tables ? left_equality : right_equality;
+  auto probe_view              = table_device_view::create(probe, stream);
+  auto build_view              = table_device_view::create(build, stream);
+  auto left_conditional_view   = table_device_view::create(left_conditional, stream);
+  auto right_conditional_view  = table_device_view::create(right_conditional, stream);
+  auto& build_conditional_view = swap_tables ? left_conditional_view : right_conditional_view;
+  row_equality equality_probe{
+    cudf::nullate::DYNAMIC{has_nulls}, *probe_view, *build_view, compare_nulls};
+
+  semi_map_type hash_table{compute_hash_table_size(build.num_rows()),
+                           std::numeric_limits<hash_value_type>::max(),
+                           cudf::detail::JoinNoneValue,
+                           detail::hash_table_allocator_type{default_allocator<char>{}, stream},
+                           stream.value()};
+
+  // Create hash table containing all keys found in right table
+  // TODO: To add support for nested columns we will need to flatten in many
+  // places. However, this probably isn't worth adding any time soon since we
+  // won't be able to support AST conditions for those types anyway.
+  auto const build_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(build)};
+  row_hash const hash_build{build_nulls, *build_view};
+  // Since we may see multiple rows that are identical in the equality tables
+  // but differ in the conditional tables, the equality comparator used for
+  // insertion must account for both sets of tables. An alternative solution
+  // would be to use a multimap, but that solution would store duplicates where
+  // equality and conditional rows are equal, so this approach is preferable.
+  // One way to make this solution even more efficient would be to only include
+  // the columns of the conditional table that are used by the expression, but
+  // that requires additional plumbing through the AST machinery and is out of
+  // scope for now.
+  row_equality equality_build_equality{build_nulls, *build_view, *build_view, compare_nulls};
+  row_equality equality_build_conditional{
+    build_nulls, *build_conditional_view, *build_conditional_view, compare_nulls};
+  double_row_equality equality_build{equality_build_equality, equality_build_conditional};
+  make_pair_function_semi pair_func_build{};
+
+  auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func_build);
+
+  // skip rows that are null here.
+  if ((compare_nulls == null_equality::EQUAL) or (not nullable(build))) {
+    hash_table.insert(iter, iter + right_num_rows, hash_build, equality_build, stream.value());
+  } else {
+    thrust::counting_iterator<cudf::size_type> stencil(0);
+    auto const [row_bitmask, _] = cudf::detail::bitmask_and(build, stream);
+    row_is_valid pred{static_cast<bitmask_type const*>(row_bitmask.data())};
+
+    // insert valid rows
+    hash_table.insert_if(
+      iter, iter + right_num_rows, stencil, pred, hash_build, equality_build, stream.value());
+  }
+
+  auto hash_table_view = hash_table.get_device_view();
+
+  // For inner joins we support optimizing the join by launching one thread for
+  // whichever table is larger rather than always using the left table.
+  detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
+  auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block;
+
+  // Allocate storage for the counter used to get the size of the join output
+  rmm::device_scalar<std::size_t> size(0, stream, mr);
+
+  // Determine number of output rows without actually building the output to simply
+  // find what the size of the output will be.
+  if (has_nulls) {
+    compute_mixed_join_output_size_semi<DEFAULT_JOIN_BLOCK_SIZE, true>
+      <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
+        *left_conditional_view,
+        *right_conditional_view,
+        *probe_view,
+        *build_view,
+        equality_probe,
+        join_type,
+        hash_table_view,
+        parser.device_expression_data,
+        swap_tables,
+        size.data(),
+        matches_per_row_span);
+  } else {
+    compute_mixed_join_output_size_semi<DEFAULT_JOIN_BLOCK_SIZE, false>
+      <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
+        *left_conditional_view,
+        *right_conditional_view,
+        *probe_view,
+        *build_view,
+        equality_probe,
+        join_type,
+        hash_table_view,
+        parser.device_expression_data,
+        swap_tables,
+        size.data(),
+        matches_per_row_span);
+  }
+
+  return {size.value(stream), std::move(matches_per_row)};
+}
+
+}  // namespace detail
+
+std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_left_semi_join_size(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls,
+  rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::compute_mixed_join_output_size_semi(left_equality,
+                                                     right_equality,
+                                                     left_conditional,
+                                                     right_conditional,
+                                                     binary_predicate,
+                                                     compare_nulls,
+                                                     detail::join_kind::LEFT_SEMI_JOIN,
+                                                     rmm::cuda_stream_default,
+                                                     mr);
+}
+
+std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_semi_join(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls,
+  std::optional<std::pair<std::size_t, device_span<size_type const>>> output_size_data,
+  rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::mixed_join_semi(left_equality,
+                                 right_equality,
+                                 left_conditional,
+                                 right_conditional,
+                                 binary_predicate,
+                                 compare_nulls,
+                                 detail::join_kind::LEFT_SEMI_JOIN,
+                                 output_size_data,
+                                 rmm::cuda_stream_default,
+                                 mr);
+}
+
+std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_left_anti_join_size(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls,
+  rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::compute_mixed_join_output_size_semi(left_equality,
+                                                     right_equality,
+                                                     left_conditional,
+                                                     right_conditional,
+                                                     binary_predicate,
+                                                     compare_nulls,
+                                                     detail::join_kind::LEFT_ANTI_JOIN,
+                                                     rmm::cuda_stream_default,
+                                                     mr);
+}
+
+std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_anti_join(
+  table_view const& left_equality,
+  table_view const& right_equality,
+  table_view const& left_conditional,
+  table_view const& right_conditional,
+  ast::expression const& binary_predicate,
+  null_equality compare_nulls,
+  std::optional<std::pair<std::size_t, device_span<size_type const>>> output_size_data,
+  rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::mixed_join_semi(left_equality,
+                                 right_equality,
+                                 left_conditional,
+                                 right_conditional,
+                                 binary_predicate,
+                                 compare_nulls,
+                                 detail::join_kind::LEFT_ANTI_JOIN,
+                                 output_size_data,
+                                 rmm::cuda_stream_default,
+                                 mr);
+}
+
+}  // namespace cudf
diff --git a/cpp/src/join/mixed_join_size_kernels.cu b/cpp/src/join/mixed_join_size_kernels.cu
new file mode 100644
index 00000000000..1a08b8792c2
--- /dev/null
+++ b/cpp/src/join/mixed_join_size_kernels.cu
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <join/hash_join.cuh>
+#include <join/join_common_utils.cuh>
+#include <join/join_common_utils.hpp>
+#include <join/mixed_join_common_utils.cuh>
+
+#include <cudf/ast/detail/expression_evaluator.cuh>
+#include <cudf/ast/detail/expression_parser.hpp>
+#include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/table/table_device_view.cuh>
+#include <cudf/utilities/span.hpp>
+
+#include <cooperative_groups.h>
+
+#include <cub/cub.cuh>
+#include <thrust/iterator/discard_iterator.h>
+
+namespace cudf {
+namespace detail {
+namespace cg = cooperative_groups;
+
+template <int block_size, bool has_nulls>
+__global__ void compute_mixed_join_output_size(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  ast::detail::expression_device_view device_expression_data,
+  bool const swap_tables,
+  std::size_t* output_size,
+  cudf::device_span<cudf::size_type> matches_per_row)
+{
+  // The (required) extern storage of the shared memory array leads to
+  // conflicting declarations between different templates. The easiest
+  // workaround is to declare an arbitrary (here char) array type then cast it
+  // after the fact to the appropriate type.
+  extern __shared__ char raw_intermediate_storage[];
+  cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
+    reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
+  auto thread_intermediate_storage =
+    intermediate_storage + (threadIdx.x * device_expression_data.num_intermediates);
+
+  std::size_t thread_counter{0};
+  cudf::size_type const start_idx      = threadIdx.x + blockIdx.x * block_size;
+  cudf::size_type const stride         = block_size * gridDim.x;
+  cudf::size_type const left_num_rows  = left_table.num_rows();
+  cudf::size_type const right_num_rows = right_table.num_rows();
+  auto const outer_num_rows            = (swap_tables ? right_num_rows : left_num_rows);
+
+  auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
+    left_table, right_table, device_expression_data);
+
+  row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe};
+  auto const empty_key_sentinel = hash_table_view.get_empty_key_sentinel();
+  make_pair_function pair_func{hash_probe, empty_key_sentinel};
+
+  // Figure out the number of elements for this key.
+  cg::thread_block_tile<1> this_thread = cg::this_thread();
+  // TODO: Address asymmetry in operator.
+  auto count_equality = pair_expression_equality<has_nulls>{
+    evaluator, thread_intermediate_storage, swap_tables, equality_probe};
+
+  for (cudf::size_type outer_row_index = start_idx; outer_row_index < outer_num_rows;
+       outer_row_index += stride) {
+    auto query_pair = pair_func(outer_row_index);
+    if (join_type == join_kind::LEFT_JOIN || join_type == join_kind::FULL_JOIN) {
+      matches_per_row[outer_row_index] =
+        hash_table_view.pair_count_outer(this_thread, query_pair, count_equality);
+    } else {
+      matches_per_row[outer_row_index] =
+        hash_table_view.pair_count(this_thread, query_pair, count_equality);
+    }
+    thread_counter += matches_per_row[outer_row_index];
+  }
+
+  using BlockReduce = cub::BlockReduce<cudf::size_type, block_size>;
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+  std::size_t block_counter = BlockReduce(temp_storage).Sum(thread_counter);
+
+  // Add block counter to global counter
+  if (threadIdx.x == 0) atomicAdd(output_size, block_counter);
+}
+
+template __global__ void compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, true>(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  ast::detail::expression_device_view device_expression_data,
+  bool const swap_tables,
+  std::size_t* output_size,
+  cudf::device_span<cudf::size_type> matches_per_row);
+
+template __global__ void compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, false>(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  ast::detail::expression_device_view device_expression_data,
+  bool const swap_tables,
+  std::size_t* output_size,
+  cudf::device_span<cudf::size_type> matches_per_row);
+
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/src/join/mixed_join_size_kernels_semi.cu b/cpp/src/join/mixed_join_size_kernels_semi.cu
new file mode 100644
index 00000000000..2c077a698f8
--- /dev/null
+++ b/cpp/src/join/mixed_join_size_kernels_semi.cu
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <join/join_common_utils.cuh>
+#include <join/join_common_utils.hpp>
+#include <join/mixed_join_common_utils.cuh>
+
+#include <cudf/ast/detail/expression_evaluator.cuh>
+#include <cudf/ast/detail/expression_parser.hpp>
+#include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/table/table_device_view.cuh>
+#include <cudf/utilities/span.hpp>
+
+#include <cub/cub.cuh>
+
+namespace cudf {
+namespace detail {
+
+namespace cg = cooperative_groups;
+
+template <int block_size, bool has_nulls>
+__global__ void compute_mixed_join_output_size_semi(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::semi_map_type::device_view hash_table_view,
+  ast::detail::expression_device_view device_expression_data,
+  bool const swap_tables,
+  std::size_t* output_size,
+  cudf::device_span<cudf::size_type> matches_per_row)
+{
+  // The (required) extern storage of the shared memory array leads to
+  // conflicting declarations between different templates. The easiest
+  // workaround is to declare an arbitrary (here char) array type then cast it
+  // after the fact to the appropriate type.
+  extern __shared__ char raw_intermediate_storage[];
+  cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
+    reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
+  auto thread_intermediate_storage =
+    intermediate_storage + (threadIdx.x * device_expression_data.num_intermediates);
+
+  std::size_t thread_counter{0};
+  cudf::size_type const start_idx      = threadIdx.x + blockIdx.x * block_size;
+  cudf::size_type const stride         = block_size * gridDim.x;
+  cudf::size_type const left_num_rows  = left_table.num_rows();
+  cudf::size_type const right_num_rows = right_table.num_rows();
+  auto const outer_num_rows            = (swap_tables ? right_num_rows : left_num_rows);
+
+  auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
+    left_table, right_table, device_expression_data);
+  row_hash hash_probe{nullate::DYNAMIC{has_nulls}, probe};
+  // TODO: Address asymmetry in operator.
+  auto equality = single_expression_equality<has_nulls>{
+    evaluator, thread_intermediate_storage, swap_tables, equality_probe};
+
+  for (cudf::size_type outer_row_index = start_idx; outer_row_index < outer_num_rows;
+       outer_row_index += stride) {
+    matches_per_row[outer_row_index] =
+      ((join_type == join_kind::LEFT_ANTI_JOIN) !=
+       (hash_table_view.contains(outer_row_index, hash_probe, equality)));
+    thread_counter += matches_per_row[outer_row_index];
+  }
+
+  using BlockReduce = cub::BlockReduce<cudf::size_type, block_size>;
+  __shared__ typename BlockReduce::TempStorage temp_storage;
+  std::size_t block_counter = BlockReduce(temp_storage).Sum(thread_counter);
+
+  // Add block counter to global counter
+  if (threadIdx.x == 0) atomicAdd(output_size, block_counter);
+}
+
+template __global__ void compute_mixed_join_output_size_semi<DEFAULT_JOIN_BLOCK_SIZE, true>(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::semi_map_type::device_view hash_table_view,
+  ast::detail::expression_device_view device_expression_data,
+  bool const swap_tables,
+  std::size_t* output_size,
+  cudf::device_span<cudf::size_type> matches_per_row);
+
+template __global__ void compute_mixed_join_output_size_semi<DEFAULT_JOIN_BLOCK_SIZE, false>(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::semi_map_type::device_view hash_table_view,
+  ast::detail::expression_device_view device_expression_data,
+  bool const swap_tables,
+  std::size_t* output_size,
+  cudf::device_span<cudf::size_type> matches_per_row);
+
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index 5eb8ca2452e..8563a2a3bd3 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -37,8 +37,6 @@
 #include <thrust/sequence.h>
 #include <thrust/tuple.h>
 
-#include <cuco/static_map.cuh>
-
 namespace cudf {
 namespace detail {
 
@@ -91,13 +89,11 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_anti_join(
   auto left_flattened_keys  = left_flattened_tables.flattened_columns();
 
   // Create hash table.
-  auto hash_table = cuco::
-    static_map<hash_value_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>{
-      compute_hash_table_size(right_num_rows),
-      std::numeric_limits<hash_value_type>::max(),
-      cudf::detail::JoinNoneValue,
-      hash_table_allocator_type{default_allocator<char>{}, stream},
-      stream.value()};
+  semi_map_type hash_table{compute_hash_table_size(right_num_rows),
+                           std::numeric_limits<hash_value_type>::max(),
+                           cudf::detail::JoinNoneValue,
+                           hash_table_allocator_type{default_allocator<char>{}, stream},
+                           stream.value()};
 
   // Create hash table containing all keys found in right table
   auto right_rows_d      = table_device_view::create(right_flattened_keys, stream);
diff --git a/cpp/src/lists/copying/gather.cu b/cpp/src/lists/copying/gather.cu
index fe45cdfc338..8d2de8997d1 100644
--- a/cpp/src/lists/copying/gather.cu
+++ b/cpp/src/lists/copying/gather.cu
@@ -53,8 +53,8 @@ namespace detail {
  * @endcode
  */
 struct list_gatherer {
-  typedef size_type argument_type;
-  typedef size_type result_type;
+  using argument_type = size_type;
+  using result_type   = size_type;
 
   size_t offset_count;
   size_type const* base_offsets;
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index 7b3b7b0f3fd..66b26148ede 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -257,7 +257,7 @@ __global__ void copy_block_partitions(InputIter input_iter,
     reinterpret_cast<size_type*>(block_output + OPTIMIZED_BLOCK_SIZE * OPTIMIZED_ROWS_PER_THREAD);
   auto partition_offset_global = partition_offset_shared + num_partitions + 1;
 
-  typedef cub::BlockScan<size_type, OPTIMIZED_BLOCK_SIZE> BlockScan;
+  using BlockScan = cub::BlockScan<size_type, OPTIMIZED_BLOCK_SIZE>;
   __shared__ typename BlockScan::TempStorage temp_storage;
 
   // use ELEMENTS_PER_THREAD=2 to support upto 1024 partitions
diff --git a/cpp/src/quantiles/quantiles_util.hpp b/cpp/src/quantiles/quantiles_util.hpp
index 7af1e47087b..a5dc643a688 100644
--- a/cpp/src/quantiles/quantiles_util.hpp
+++ b/cpp/src/quantiles/quantiles_util.hpp
@@ -45,8 +45,8 @@ CUDF_HOST_DEVICE inline Result linear(T lhs, T rhs, double frac)
   // Underflow may occur when converting int64 to double
   // detail: https://github.com/rapidsai/cudf/issues/1417
 
-  double dlhs           = static_cast<double>(lhs);
-  double drhs           = static_cast<double>(rhs);
+  auto dlhs             = static_cast<double>(lhs);
+  auto drhs             = static_cast<double>(rhs);
   double one_minus_frac = 1.0 - frac;
   return static_cast<Result>(one_minus_frac * dlhs + frac * drhs);
 }
@@ -55,8 +55,8 @@ template <typename Result, typename T>
 CUDF_HOST_DEVICE inline Result midpoint(T lhs, T rhs)
 {
   // TODO: try std::midpoint (C++20) if available
-  double dlhs = static_cast<double>(lhs);
-  double drhs = static_cast<double>(rhs);
+  auto dlhs = static_cast<double>(lhs);
+  auto drhs = static_cast<double>(rhs);
   return static_cast<Result>(dlhs / 2 + drhs / 2);
 }
 
diff --git a/cpp/src/rolling/rolling_detail.cuh b/cpp/src/rolling/rolling_detail.cuh
index bc1947dfeed..7c52856b147 100644
--- a/cpp/src/rolling/rolling_detail.cuh
+++ b/cpp/src/rolling/rolling_detail.cuh
@@ -950,9 +950,9 @@ __launch_bounds__(block_size) __global__
     int64_t following_window = following_window_begin[i];
 
     // compute bounds
-    size_type start = static_cast<size_type>(
+    auto start = static_cast<size_type>(
       min(static_cast<int64_t>(input.size()), max(0L, i - preceding_window + 1)));
-    size_type end = static_cast<size_type>(
+    auto end = static_cast<size_type>(
       min(static_cast<int64_t>(input.size()), max(0L, i + following_window + 1)));
     size_type start_index = min(start, end);
     size_type end_index   = max(start, end);
diff --git a/cpp/src/strings/capitalize.cu b/cpp/src/strings/capitalize.cu
index 9618f325fce..84ae2b73bba 100644
--- a/cpp/src/strings/capitalize.cu
+++ b/cpp/src/strings/capitalize.cu
@@ -108,7 +108,7 @@ struct base_fn {
       if (!d_chars) d_offsets[idx] = 0;
     }
 
-    Derived& derived  = static_cast<Derived&>(*this);
+    auto& derived     = static_cast<Derived&>(*this);
     auto const d_str  = d_column.element<string_view>(idx);
     offset_type bytes = 0;
     auto d_buffer     = d_chars ? d_chars + d_offsets[idx] : nullptr;
diff --git a/cpp/src/strings/combine/join_list_elements.cu b/cpp/src/strings/combine/join_list_elements.cu
index 9482d4db9b8..8f364f5c9bc 100644
--- a/cpp/src/strings/combine/join_list_elements.cu
+++ b/cpp/src/strings/combine/join_list_elements.cu
@@ -61,9 +61,9 @@ struct compute_size_and_concatenate_fn {
   // If d_chars != nullptr: only concatenate strings.
   char* d_chars{nullptr};
 
-  __device__ bool output_is_null(size_type const idx,
-                                 size_type const start_idx,
-                                 size_type const end_idx) const noexcept
+  [[nodiscard]] __device__ bool output_is_null(size_type const idx,
+                                               size_type const start_idx,
+                                               size_type const end_idx) const noexcept
   {
     if (func.is_null_list(lists_dv, idx)) { return true; }
     return empty_list_policy == output_if_empty_list::NULL_ELEMENT && start_idx == end_idx;
@@ -127,13 +127,16 @@ struct compute_size_and_concatenate_fn {
 struct scalar_separator_fn {
   string_scalar_device_view const d_separator;
 
-  __device__ bool is_null_list(column_device_view const& lists_dv,
-                               size_type const idx) const noexcept
+  [[nodiscard]] __device__ bool is_null_list(column_device_view const& lists_dv,
+                                             size_type const idx) const noexcept
   {
     return lists_dv.is_null(idx);
   }
 
-  __device__ string_view separator(size_type const) const noexcept { return d_separator.value(); }
+  [[nodiscard]] __device__ string_view separator(size_type const) const noexcept
+  {
+    return d_separator.value();
+  }
 };
 
 template <typename CompFn>
@@ -222,13 +225,13 @@ struct column_separators_fn {
   column_device_view const separators_dv;
   string_scalar_device_view const sep_narep_dv;
 
-  __device__ bool is_null_list(column_device_view const& lists_dv,
-                               size_type const idx) const noexcept
+  [[nodiscard]] __device__ bool is_null_list(column_device_view const& lists_dv,
+                                             size_type const idx) const noexcept
   {
     return lists_dv.is_null(idx) || (separators_dv.is_null(idx) && !sep_narep_dv.is_valid());
   }
 
-  __device__ string_view separator(size_type const idx) const noexcept
+  [[nodiscard]] __device__ string_view separator(size_type const idx) const noexcept
   {
     return separators_dv.is_valid(idx) ? separators_dv.element<string_view>(idx)
                                        : sep_narep_dv.value();
diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu
index 9376a0082a8..efdee65c1f6 100644
--- a/cpp/src/strings/contains.cu
+++ b/cpp/src/strings/contains.cu
@@ -53,7 +53,7 @@ struct contains_fn {
 
   __device__ bool operator()(size_type idx)
   {
-    if (d_strings.is_null(idx)) return 0;
+    if (d_strings.is_null(idx)) return false;
     string_view d_str = d_strings.element<string_view>(idx);
     int32_t begin     = 0;
     int32_t end       = bmatch ? 1    // match only the beginning of the string;
diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu
index 8d0c5704a7b..cd3dc3b46f3 100644
--- a/cpp/src/strings/convert/convert_datetime.cu
+++ b/cpp/src/strings/convert/convert_datetime.cu
@@ -156,7 +156,7 @@ struct format_compiler {
 
   device_span<format_item const> format_items() { return device_span<format_item const>(d_items); }
 
-  int8_t subsecond_precision() const { return specifiers.at('f'); }
+  [[nodiscard]] int8_t subsecond_precision() const { return specifiers.at('f'); }
 };
 
 /**
@@ -194,7 +194,7 @@ struct parse_datetime {
    *
    * @return `1x10^exponent` for `0 <= exponent <= 9`
    */
-  __device__ constexpr int64_t power_of_ten(int32_t const exponent) const
+  [[nodiscard]] __device__ constexpr int64_t power_of_ten(int32_t const exponent) const
   {
     constexpr int64_t powers_of_ten[] = {
       1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L, 1000000000L};
@@ -202,7 +202,7 @@ struct parse_datetime {
   }
 
   // Walk the format_items to parse the string into date/time components
-  __device__ timestamp_components parse_into_parts(string_view const& d_string) const
+  [[nodiscard]] __device__ timestamp_components parse_into_parts(string_view const& d_string) const
   {
     timestamp_components timeparts = {1970, 1, 1, 0};  // init to epoch time
 
@@ -310,7 +310,7 @@ struct parse_datetime {
     return timeparts;
   }
 
-  __device__ int64_t timestamp_from_parts(timestamp_components const& timeparts) const
+  [[nodiscard]] __device__ int64_t timestamp_from_parts(timestamp_components const& timeparts) const
   {
     auto const ymd =  // convenient chrono class handles the leap year calculations for us
       cuda::std::chrono::year_month_day(
@@ -689,7 +689,7 @@ struct from_timestamp_base {
    *     modulo(-1,60) -> 59
    * @endcode
    */
-  __device__ int32_t modulo_time(int64_t time, int64_t base) const
+  [[nodiscard]] __device__ int32_t modulo_time(int64_t time, int64_t base) const
   {
     return static_cast<int32_t>(((time % base) + base) % base);
   };
@@ -707,12 +707,12 @@ struct from_timestamp_base {
    *     scale( 61,60) ->  1
    * @endcode
    */
-  __device__ int64_t scale_time(int64_t time, int64_t base) const
+  [[nodiscard]] __device__ int64_t scale_time(int64_t time, int64_t base) const
   {
     return (time - ((time < 0) * (base - 1L))) / base;
   };
 
-  __device__ time_components get_time_components(int64_t tstamp) const
+  [[nodiscard]] __device__ time_components get_time_components(int64_t tstamp) const
   {
     time_components result = {0};
     if constexpr (std::is_same_v<T, cudf::timestamp_D>) { return result; }
@@ -855,7 +855,7 @@ struct datetime_formatter : public from_timestamp_base<T> {
   }
 
   // from https://howardhinnant.github.io/date/date.html
-  __device__ thrust::pair<int32_t, int32_t> get_iso_week_year(
+  [[nodiscard]] __device__ thrust::pair<int32_t, int32_t> get_iso_week_year(
     cuda::std::chrono::year_month_day const& ymd) const
   {
     auto const days = cuda::std::chrono::sys_days(ymd);
@@ -885,8 +885,8 @@ struct datetime_formatter : public from_timestamp_base<T> {
       static_cast<int32_t>(year));
   }
 
-  __device__ int8_t get_week_of_year(cuda::std::chrono::sys_days const days,
-                                     cuda::std::chrono::sys_days const start) const
+  [[nodiscard]] __device__ int8_t get_week_of_year(cuda::std::chrono::sys_days const days,
+                                                   cuda::std::chrono::sys_days const start) const
   {
     return days < start
              ? 0
diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu
index f286149ea46..66e6f31cca2 100644
--- a/cpp/src/strings/convert/convert_durations.cu
+++ b/cpp/src/strings/convert/convert_durations.cu
@@ -156,7 +156,7 @@ struct format_compiler {
 
   format_item const* compiled_format_items() { return d_items.data(); }
 
-  size_type items_count() const { return static_cast<size_type>(d_items.size()); }
+  [[nodiscard]] size_type items_count() const { return static_cast<size_type>(d_items.size()); }
 };
 
 template <typename T>
diff --git a/cpp/src/strings/findall.cu b/cpp/src/strings/findall.cu
index 8d96f0de415..c82ab4f81c3 100644
--- a/cpp/src/strings/findall.cu
+++ b/cpp/src/strings/findall.cu
@@ -67,7 +67,7 @@ struct findall_fn {
     string_view d_str      = d_strings.element<string_view>(idx);
     auto const nchars      = d_str.length();
     int32_t spos           = 0;
-    int32_t epos           = static_cast<int32_t>(nchars);
+    auto epos              = static_cast<int32_t>(nchars);
     size_type column_count = 0;
     while (spos <= nchars) {
       if (prog.find<stack_size>(idx, d_str, spos, epos) <= 0) break;  // no more matches found
diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu
index c61fb8905f5..ae807db10e6 100644
--- a/cpp/src/strings/json/json_path.cu
+++ b/cpp/src/strings/json/json_path.cu
@@ -72,7 +72,7 @@ enum class parse_result {
  */
 class parser {
  protected:
-  CUDF_HOST_DEVICE inline parser() : input(nullptr), input_len(0), pos(nullptr) {}
+  CUDF_HOST_DEVICE inline parser() {}
   CUDF_HOST_DEVICE inline parser(const char* _input, int64_t _input_len)
     : input(_input), input_len(_input_len), pos(_input)
   {
@@ -177,9 +177,9 @@ class parser {
   }
 
  protected:
-  char const* input;
-  int64_t input_len;
-  char const* pos;
+  char const* input{nullptr};
+  int64_t input_len{0};
+  char const* pos{nullptr};
 
   CUDF_HOST_DEVICE inline bool is_whitespace(char c) { return c <= ' '; }
 };
@@ -220,18 +220,10 @@ enum json_element_type { NONE, OBJECT, ARRAY, VALUE };
  */
 class json_state : private parser {
  public:
-  __device__ json_state()
-    : parser(),
-      cur_el_start(nullptr),
-      cur_el_type(json_element_type::NONE),
-      parent_el_type(json_element_type::NONE)
-  {
-  }
+  __device__ json_state() : parser() {}
   __device__ json_state(const char* _input, int64_t _input_len, get_json_object_options _options)
     : parser(_input, _input_len),
-      cur_el_start(nullptr),
-      cur_el_type(json_element_type::NONE),
-      parent_el_type(json_element_type::NONE),
+
       options(_options)
   {
   }
@@ -340,7 +332,7 @@ class json_state : private parser {
       // next
       parse_result result = next_element_internal(false);
       if (result != parse_result::SUCCESS) { return result; }
-    } while (1);
+    } while (true);
 
     return parse_result::ERROR;
   }
@@ -486,12 +478,12 @@ class json_state : private parser {
     return (c == '\"') || (options.get_allow_single_quotes() && (c == '\''));
   }
 
-  const char* cur_el_start;          // pointer to the first character of the -value- of the current
-                                     // element - not the name
-  string_view cur_el_name;           // name of the current element (if applicable)
-  json_element_type cur_el_type;     // type of the current element
-  json_element_type parent_el_type;  // parent element type
-  get_json_object_options options;   // behavior options
+  const char* cur_el_start{nullptr};  // pointer to the first character of the -value- of the
+                                      // current element - not the name
+  string_view cur_el_name;            // name of the current element (if applicable)
+  json_element_type cur_el_type{json_element_type::NONE};     // type of the current element
+  json_element_type parent_el_type{json_element_type::NONE};  // parent element type
+  get_json_object_options options;                            // behavior options
 };
 
 enum class path_operator_type { ROOT, CHILD, CHILD_WILDCARD, CHILD_INDEX, ERROR, END };
@@ -501,26 +493,23 @@ enum class path_operator_type { ROOT, CHILD, CHILD_WILDCARD, CHILD_INDEX, ERROR,
  * an array of these operators applied to the incoming json string,
  */
 struct path_operator {
-  CUDF_HOST_DEVICE inline path_operator()
-    : type(path_operator_type::ERROR), index(-1), expected_type{NONE}
-  {
-  }
+  CUDF_HOST_DEVICE inline path_operator() {}
   CUDF_HOST_DEVICE inline path_operator(path_operator_type _type,
                                         json_element_type _expected_type = NONE)
-    : type(_type), index(-1), expected_type{_expected_type}
+    : type(_type), expected_type{_expected_type}
   {
   }
 
-  path_operator_type type;  // operator type
+  path_operator_type type{path_operator_type::ERROR};  // operator type
   // the expected element type we're applying this operation to.
   // for example:
   //    - you cannot retrieve a subscripted field (eg [5]) from an object.
   //    - you cannot retrieve a field by name (eg  .book) from an array.
   //    - you -can- use .* for both arrays and objects
   // a value of NONE imples any type accepted
-  json_element_type expected_type;  // the expected type of the element we're working with
-  string_view name;                 // name to match against (if applicable)
-  int index;                        // index for subscript operator
+  json_element_type expected_type{NONE};  // the expected type of the element we're working with
+  string_view name;                       // name to match against (if applicable)
+  int index{-1};                          // index for subscript operator
 };
 
 /**
diff --git a/cpp/src/strings/padding.cu b/cpp/src/strings/padding.cu
index 20868077cf4..f2a27d1b11d 100644
--- a/cpp/src/strings/padding.cu
+++ b/cpp/src/strings/padding.cu
@@ -122,7 +122,7 @@ std::unique_ptr<column> pad(
         if (d_strings.is_null(idx)) return;
         string_view d_str = d_strings.element<string_view>(idx);
         char* ptr         = d_chars + d_offsets[idx];
-        int32_t pad       = static_cast<int32_t>(width - d_str.length());
+        auto pad          = static_cast<int32_t>(width - d_str.length());
         auto right_pad    = (width & 1) ? pad / 2 : (pad - pad / 2);  // odd width = right-justify
         auto left_pad =
           pad - right_pad;  // e.g. width=7 gives "++foxx+" while width=6 gives "+fox++"
diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp
index 8fbd82b8dc7..7be88d01387 100644
--- a/cpp/src/strings/regex/regcomp.cpp
+++ b/cpp/src/strings/regex/regcomp.cpp
@@ -20,7 +20,7 @@
 
 #include <algorithm>
 #include <array>
-#include <string.h>
+#include <cstring>
 
 namespace cudf {
 namespace strings {
diff --git a/cpp/src/strings/regex/regcomp.h b/cpp/src/strings/regex/regcomp.h
index 63d7933eebe..3131767de59 100644
--- a/cpp/src/strings/regex/regcomp.h
+++ b/cpp/src/strings/regex/regcomp.h
@@ -51,9 +51,9 @@ enum InstType {
  * @brief Class type for regex compiler instruction.
  */
 struct reclass {
-  int32_t builtins;         // bit mask identifying builtin classes
+  int32_t builtins{0};      // bit mask identifying builtin classes
   std::u32string literals;  // ranges as pairs of utf-8 characters
-  reclass() : builtins(0) {}
+  reclass() {}
   reclass(int m) : builtins(m) {}
 };
 
@@ -99,20 +99,20 @@ class reprog {
   int32_t add_class(reclass cls);
 
   void set_groups_count(int32_t groups);
-  int32_t groups_count() const;
+  [[nodiscard]] int32_t groups_count() const;
 
-  const reinst* insts_data() const;
-  int32_t insts_count() const;
+  [[nodiscard]] const reinst* insts_data() const;
+  [[nodiscard]] int32_t insts_count() const;
   reinst& inst_at(int32_t id);
 
   reclass& class_at(int32_t id);
-  int32_t classes_count() const;
+  [[nodiscard]] int32_t classes_count() const;
 
-  const int32_t* starts_data() const;
-  int32_t starts_count() const;
+  [[nodiscard]] const int32_t* starts_data() const;
+  [[nodiscard]] int32_t starts_count() const;
 
   void set_start_inst(int32_t id);
-  int32_t get_start_inst() const;
+  [[nodiscard]] int32_t get_start_inst() const;
 
   void optimize1();
   void optimize2();
diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh
index d6b8307c3fb..a9928a6bd49 100644
--- a/cpp/src/strings/regex/regex.cuh
+++ b/cpp/src/strings/regex/regex.cuh
@@ -132,32 +132,38 @@ class reprog_device {
   /**
    * @brief Returns the number of regex instructions.
    */
-  __host__ __device__ int32_t insts_counts() const { return _insts_count; }
+  [[nodiscard]] __host__ __device__ int32_t insts_counts() const { return _insts_count; }
 
   /**
    * @brief Returns true if this is an empty program.
    */
-  __device__ bool is_empty() const { return insts_counts() == 0 || get_inst(0)->type == END; }
+  [[nodiscard]] __device__ bool is_empty() const
+  {
+    return insts_counts() == 0 || get_inst(0)->type == END;
+  }
 
   /**
    * @brief Returns the number of regex groups found in the expression.
    */
-  CUDF_HOST_DEVICE inline int32_t group_counts() const { return _num_capturing_groups; }
+  [[nodiscard]] CUDF_HOST_DEVICE inline int32_t group_counts() const
+  {
+    return _num_capturing_groups;
+  }
 
   /**
    * @brief Returns the regex instruction object for a given index.
    */
-  __device__ inline reinst* get_inst(int32_t idx) const;
+  [[nodiscard]] __device__ inline reinst* get_inst(int32_t idx) const;
 
   /**
    * @brief Returns the regex class object for a given index.
    */
-  __device__ inline reclass_device get_class(int32_t idx) const;
+  [[nodiscard]] __device__ inline reclass_device get_class(int32_t idx) const;
 
   /**
    * @brief Returns the start-instruction-ids vector.
    */
-  __device__ inline int32_t* startinst_ids() const;
+  [[nodiscard]] __device__ inline int32_t* startinst_ids() const;
 
   /**
    * @brief Does a find evaluation using the compiled expression on the given string.
diff --git a/cpp/src/strings/regex/regex.inl b/cpp/src/strings/regex/regex.inl
index 0e11e9c1bbd..50aab8c3ac4 100644
--- a/cpp/src/strings/regex/regex.inl
+++ b/cpp/src/strings/regex/regex.inl
@@ -246,7 +246,7 @@ __device__ inline int32_t reprog_device::regexec(
       expanded = false;
 
       for (int16_t i = 0; i < jnk.list1->size; i++) {
-        int32_t inst_id     = static_cast<int32_t>(jnk.list1->inst_ids[i]);
+        auto inst_id        = static_cast<int32_t>(jnk.list1->inst_ids[i]);
         int2& range         = jnk.list1->ranges[i];
         const reinst* inst  = get_inst(inst_id);
         int32_t id_activate = -1;
@@ -283,7 +283,7 @@ __device__ inline int32_t reprog_device::regexec(
             break;
           case BOW: {
             auto codept           = utf8_to_codepoint(c);
-            char32_t last_c       = static_cast<char32_t>(pos ? dstr[pos - 1] : 0);
+            auto last_c           = static_cast<char32_t>(pos ? dstr[pos - 1] : 0);
             auto last_codept      = utf8_to_codepoint(last_c);
             bool cur_alphaNumeric = (codept < 0x010000) && IS_ALPHANUM(_codepoint_flags[codept]);
             bool last_alphaNumeric =
@@ -296,7 +296,7 @@ __device__ inline int32_t reprog_device::regexec(
           }
           case NBOW: {
             auto codept           = utf8_to_codepoint(c);
-            char32_t last_c       = static_cast<char32_t>(pos ? dstr[pos - 1] : 0);
+            auto last_c           = static_cast<char32_t>(pos ? dstr[pos - 1] : 0);
             auto last_codept      = utf8_to_codepoint(last_c);
             bool cur_alphaNumeric = (codept < 0x010000) && IS_ALPHANUM(_codepoint_flags[codept]);
             bool last_alphaNumeric =
@@ -323,7 +323,7 @@ __device__ inline int32_t reprog_device::regexec(
     bool continue_execute = true;
     jnk.list2->reset();
     for (int16_t i = 0; continue_execute && i < jnk.list1->size; i++) {
-      int32_t inst_id     = static_cast<int32_t>(jnk.list1->inst_ids[i]);
+      auto inst_id        = static_cast<int32_t>(jnk.list1->inst_ids[i]);
       int2& range         = jnk.list1->ranges[i];
       const reinst* inst  = get_inst(inst_id);
       int32_t id_activate = -1;
@@ -415,11 +415,11 @@ __device__ inline int32_t reprog_device::call_regexec<RX_STACK_ANY>(
   auto const schar = get_inst(_startinst_id)->u1.c;
 
   auto const relists_size = relist::alloc_size(_insts_count);
-  u_char* listmem         = reinterpret_cast<u_char*>(_relists_mem);  // beginning of relist buffer;
+  auto* listmem           = reinterpret_cast<u_char*>(_relists_mem);  // beginning of relist buffer;
   listmem += (idx * relists_size * 2);                                // two relist ptrs in reljunk:
 
-  relist* list1 = new (listmem) relist(static_cast<int16_t>(_insts_count));
-  relist* list2 = new (listmem + relists_size) relist(static_cast<int16_t>(_insts_count));
+  auto* list1 = new (listmem) relist(static_cast<int16_t>(_insts_count));
+  auto* list2 = new (listmem + relists_size) relist(static_cast<int16_t>(_insts_count));
 
   reljunk jnk(list1, list2, stype, schar);
   return regexec(dstr, jnk, begin, end, group_id);
diff --git a/cpp/src/strings/regex/regexec.cu b/cpp/src/strings/regex/regexec.cu
index 4f93bbd6e7b..b286812226b 100644
--- a/cpp/src/strings/regex/regexec.cu
+++ b/cpp/src/strings/regex/regexec.cu
@@ -67,8 +67,7 @@ reprog_device::reprog_device(reprog& prog)
     _num_capturing_groups{prog.groups_count()},
     _insts_count{prog.insts_count()},
     _starts_count{prog.starts_count()},
-    _classes_count{prog.classes_count()},
-    _relists_mem{nullptr}
+    _classes_count{prog.classes_count()}
 {
 }
 
diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu
index 9fd1768453a..2c594bb86a8 100644
--- a/cpp/src/strings/replace/replace_re.cu
+++ b/cpp/src/strings/replace/replace_re.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -62,37 +62,49 @@ struct replace_regex_fn {
       if (!d_chars) d_offsets[idx] = 0;
       return;
     }
-    auto const d_str  = d_strings.element<string_view>(idx);
-    auto const nchars = d_str.length();                  // number of characters in input string
-    auto nbytes       = d_str.size_bytes();              // number of bytes in input string
-    auto mxn          = maxrepl < 0 ? nchars : maxrepl;  // max possible replaces for this string
-    auto in_ptr       = d_str.data();                    // input pointer (i)
-    auto out_ptr      = d_chars ? d_chars + d_offsets[idx] : nullptr;  // output pointer (o)
-    size_type lpos    = 0;
-    int32_t begin     = 0;
-    int32_t end       = static_cast<int32_t>(nchars);
+
+    auto const d_str = d_strings.element<string_view>(idx);
+    auto nbytes      = d_str.size_bytes();                  // number of bytes in input string
+    auto mxn = maxrepl < 0 ? d_str.length() + 1 : maxrepl;  // max possible replaces for this string
+    auto in_ptr        = d_str.data();                      // input pointer (i)
+    auto out_ptr       = d_chars ? d_chars + d_offsets[idx]  // output pointer (o)
+                                 : nullptr;
+    size_type last_pos = 0;
+    int32_t begin      = 0;   // these are for calling prog.find
+    int32_t end        = -1;  // matches final word-boundary if at the end of the string
+
     // copy input to output replacing strings as we go
-    while (mxn-- > 0)  // maximum number of replaces
-    {
-      if (prog.is_empty() || prog.find<stack_size>(idx, d_str, begin, end) <= 0)
-        break;                                        // no more matches
-      auto spos = d_str.byte_offset(begin);           // get offset for these
-      auto epos = d_str.byte_offset(end);             // character position values
-      nbytes += d_repl.size_bytes() - (epos - spos);  // compute new size
-      if (out_ptr)                                    // replace
-      {                                               // i:bbbbsssseeee
-        out_ptr = copy_and_increment(out_ptr, in_ptr + lpos, spos - lpos);  // o:bbbb
-        out_ptr = copy_string(out_ptr, d_repl);                             // o:bbbbrrrrrr
-                                                                            //  out_ptr ---^
-        lpos = epos;                                                        // i:bbbbsssseeee
-      }                                                                     //  in_ptr --^
-      begin = end;
-      end   = static_cast<int32_t>(nchars);
+    while (mxn-- > 0) {  // maximum number of replaces
+
+      if (prog.is_empty() || prog.find<stack_size>(idx, d_str, begin, end) <= 0) {
+        break;  // no more matches
+      }
+
+      auto const start_pos = d_str.byte_offset(begin);        // get offset for these
+      auto const end_pos   = d_str.byte_offset(end);          // character position values
+      nbytes += d_repl.size_bytes() - (end_pos - start_pos);  // and compute new size
+
+      if (out_ptr) {                                         // replace:
+                                                             // i:bbbbsssseeee
+        out_ptr = copy_and_increment(out_ptr,                //   ^
+                                     in_ptr + last_pos,      // o:bbbb
+                                     start_pos - last_pos);  //       ^
+        out_ptr = copy_string(out_ptr, d_repl);              // o:bbbbrrrrrr
+                                                             //  out_ptr ---^
+        last_pos = end_pos;                                  // i:bbbbsssseeee
+      }                                                      //  in_ptr --^
+
+      begin = end + (begin == end);
+      end   = -1;
     }
-    if (out_ptr)                                                  // copy the remainder
-      memcpy(out_ptr, in_ptr + lpos, d_str.size_bytes() - lpos);  // o:bbbbrrrrrreeee
-    else
+
+    if (out_ptr) {
+      memcpy(out_ptr,                         // copy the remainder
+             in_ptr + last_pos,               // o:bbbbrrrrrreeee
+             d_str.size_bytes() - last_pos);  //             ^   ^
+    } else {
       d_offsets[idx] = static_cast<int32_t>(nbytes);
+    }
   }
 };
 
diff --git a/cpp/src/strings/split/split.cu b/cpp/src/strings/split/split.cu
index c6e52a79059..aae911e8ed6 100644
--- a/cpp/src/strings/split/split.cu
+++ b/cpp/src/strings/split/split.cu
@@ -547,7 +547,7 @@ std::unique_ptr<table> split_fn(strings_column_view const& strings_column,
  */
 struct base_whitespace_split_tokenizer {
   // count the tokens only between non-whitespace characters
-  __device__ size_type count_tokens(size_type idx) const
+  [[nodiscard]] __device__ size_type count_tokens(size_type idx) const
   {
     if (d_strings.is_null(idx)) return 0;
     const string_view d_str = d_strings.element<string_view>(idx);
diff --git a/cpp/src/text/subword/data_normalizer.cu b/cpp/src/text/subword/data_normalizer.cu
index 2e6dbe62cf1..f3b642132e3 100644
--- a/cpp/src/text/subword/data_normalizer.cu
+++ b/cpp/src/text/subword/data_normalizer.cu
@@ -250,9 +250,8 @@ __global__ void kernel_data_normalizer(unsigned char const* strings,
 
   chars_per_thread[char_for_thread] = num_new_chars;
 
-  typedef cub::
-    BlockStore<uint32_t, THREADS_PER_BLOCK, MAX_NEW_CHARS, cub::BLOCK_STORE_WARP_TRANSPOSE>
-      BlockStore;
+  using BlockStore =
+    cub::BlockStore<uint32_t, THREADS_PER_BLOCK, MAX_NEW_CHARS, cub::BLOCK_STORE_WARP_TRANSPOSE>;
   __shared__ typename BlockStore::TempStorage temp_storage;
 
   // Now we perform coalesced writes back to global memory using cub.
diff --git a/cpp/src/text/subword/detail/tokenizer_utils.cuh b/cpp/src/text/subword/detail/tokenizer_utils.cuh
index 48ee0fc2b51..dcd241fc045 100644
--- a/cpp/src/text/subword/detail/tokenizer_utils.cuh
+++ b/cpp/src/text/subword/detail/tokenizer_utils.cuh
@@ -20,7 +20,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
-#include <stdint.h>
+#include <cstdint>
 
 namespace nvtext {
 namespace detail {
diff --git a/cpp/src/text/subword/load_hash_file.cu b/cpp/src/text/subword/load_hash_file.cu
index 0af34eb8092..75c79381032 100644
--- a/cpp/src/text/subword/load_hash_file.cu
+++ b/cpp/src/text/subword/load_hash_file.cu
@@ -29,9 +29,9 @@
 #include <rmm/device_uvector.hpp>
 
 #include <algorithm>
+#include <cstdint>
 #include <fstream>
 #include <iostream>
-#include <stdint.h>
 #include <vector>
 
 namespace nvtext {
diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu
index ff720daa5cb..f6b10cfc583 100644
--- a/cpp/src/transform/row_bit_count.cu
+++ b/cpp/src/transform/row_bit_count.cu
@@ -116,17 +116,17 @@ struct column_info {
  *
  */
 struct hierarchy_info {
-  hierarchy_info() : simple_per_row_size(0), complex_type_count(0), max_branch_depth(0) {}
+  hierarchy_info() {}
 
   // These two fields act as an optimization. If we find that the entire table
   // is just fixed-width types, we do not need to do the more expensive kernel call that
   // traverses the individual columns. So if complex_type_count is 0, we can just
   // return a column where every row contains the value simple_per_row_size
-  size_type simple_per_row_size;  // in bits
-  size_type complex_type_count;
+  size_type simple_per_row_size{0};  // in bits
+  size_type complex_type_count{0};
 
   // max depth of span branches present in the hierarchy.
-  size_type max_branch_depth;
+  size_type max_branch_depth{0};
 };
 
 /**
diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp
index 0339d52dda9..00408741653 100644
--- a/cpp/tests/binaryop/binop-compiled-test.cpp
+++ b/cpp/tests/binaryop/binop-compiled-test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -475,6 +475,64 @@ TYPED_TEST(BinaryOperationCompiledTest_Logical, LogicalOr_Vector_Vector)
   this->template test<cudf::library::operation::LogicalOr>(cudf::binary_operator::LOGICAL_OR);
 }
 
+template <typename T>
+using column_wrapper = std::conditional_t<std::is_same_v<T, std::string>,
+                                          cudf::test::strings_column_wrapper,
+                                          cudf::test::fixed_width_column_wrapper<T>>;
+
+template <typename TypeOut, typename TypeLhs, typename TypeRhs, class OP>
+auto NullOp_Result(column_view lhs, column_view rhs)
+{
+  auto [lhs_data, lhs_mask] = cudf::test::to_host<TypeLhs>(lhs);
+  auto [rhs_data, rhs_mask] = cudf::test::to_host<TypeRhs>(rhs);
+  std::vector<TypeOut> result(lhs.size());
+  std::vector<bool> result_mask;
+  std::transform(thrust::make_counting_iterator(0),
+                 thrust::make_counting_iterator(lhs.size()),
+                 result.begin(),
+                 [&lhs_data, &lhs_mask, &rhs_data, &rhs_mask, &result_mask](auto i) -> TypeOut {
+                   auto lhs_valid    = lhs_mask.data() and cudf::bit_is_set(lhs_mask.data(), i);
+                   auto rhs_valid    = rhs_mask.data() and cudf::bit_is_set(rhs_mask.data(), i);
+                   bool output_valid = lhs_valid or rhs_valid;
+                   auto result = OP{}(lhs_data[i], rhs_data[i], lhs_valid, rhs_valid, output_valid);
+                   result_mask.push_back(output_valid);
+                   return result;
+                 });
+  return column_wrapper<TypeOut>(result.cbegin(), result.cend(), result_mask.cbegin());
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Logical, NullLogicalAnd_Vector_Vector)
+{
+  using TypeOut  = bool;
+  using TypeLhs  = typename TestFixture::TypeLhs;
+  using TypeRhs  = typename TestFixture::TypeRhs;
+  using NULL_AND = cudf::library::operation::NullLogicalAnd<TypeOut, TypeLhs, TypeRhs>;
+
+  auto lhs            = lhs_random_column<TypeLhs>(col_size);
+  auto rhs            = rhs_random_column<TypeRhs>(col_size);
+  auto const expected = NullOp_Result<TypeOut, TypeLhs, TypeRhs, NULL_AND>(lhs, rhs);
+
+  auto const result = cudf::binary_operation(
+    lhs, rhs, cudf::binary_operator::NULL_LOGICAL_AND, data_type(type_to_id<TypeOut>()));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Logical, NullLogicalOr_Vector_Vector)
+{
+  using TypeOut = bool;
+  using TypeLhs = typename TestFixture::TypeLhs;
+  using TypeRhs = typename TestFixture::TypeRhs;
+  using NULL_OR = cudf::library::operation::NullLogicalOr<TypeOut, TypeLhs, TypeRhs>;
+
+  auto lhs            = lhs_random_column<TypeLhs>(col_size);
+  auto rhs            = rhs_random_column<TypeRhs>(col_size);
+  auto const expected = NullOp_Result<TypeOut, TypeLhs, TypeRhs, NULL_OR>(lhs, rhs);
+
+  auto const result = cudf::binary_operation(
+    lhs, rhs, cudf::binary_operator::NULL_LOGICAL_OR, data_type(type_to_id<TypeOut>()));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
 // Comparison Operations ==, !=, <, >, <=, >=
 // n<!=>n, t<!=>t, d<!=>d, s<!=>s, dc<!=>dc
 using Comparison_types = cudf::test::Types<cudf::test::Types<bool, int8_t, int16_t>,
@@ -554,32 +612,6 @@ struct BinaryOperationCompiledTest_NullOps : public BinaryOperationCompiledTest<
 };
 TYPED_TEST_SUITE(BinaryOperationCompiledTest_NullOps, Null_types);
 
-template <typename T>
-using column_wrapper = std::conditional_t<std::is_same_v<T, std::string>,
-                                          cudf::test::strings_column_wrapper,
-                                          cudf::test::fixed_width_column_wrapper<T>>;
-
-template <typename TypeOut, typename TypeLhs, typename TypeRhs, class OP>
-auto NullOp_Result(column_view lhs, column_view rhs)
-{
-  auto [lhs_data, lhs_mask] = cudf::test::to_host<TypeLhs>(lhs);
-  auto [rhs_data, rhs_mask] = cudf::test::to_host<TypeRhs>(rhs);
-  std::vector<TypeOut> result(lhs.size());
-  std::vector<bool> result_mask;
-  std::transform(thrust::make_counting_iterator(0),
-                 thrust::make_counting_iterator(lhs.size()),
-                 result.begin(),
-                 [&lhs_data, &lhs_mask, &rhs_data, &rhs_mask, &result_mask](auto i) -> TypeOut {
-                   auto lhs_valid    = lhs_mask.data() and cudf::bit_is_set(lhs_mask.data(), i);
-                   auto rhs_valid    = rhs_mask.data() and cudf::bit_is_set(rhs_mask.data(), i);
-                   bool output_valid = lhs_valid or rhs_valid;
-                   auto result = OP{}(lhs_data[i], rhs_data[i], lhs_valid, rhs_valid, output_valid);
-                   result_mask.push_back(output_valid);
-                   return result;
-                 });
-  return column_wrapper<TypeOut>(result.cbegin(), result.cend(), result_mask.cbegin());
-}
-
 TYPED_TEST(BinaryOperationCompiledTest_NullOps, NullEquals_Vector_Vector)
 {
   using TypeOut     = bool;
diff --git a/cpp/tests/binaryop/util/operation.h b/cpp/tests/binaryop/util/operation.h
index 481e5cfd4a9..22802580cd0 100644
--- a/cpp/tests/binaryop/util/operation.h
+++ b/cpp/tests/binaryop/util/operation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Copyright 2018-2019 BlazingDB, Inc.
  *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
@@ -323,6 +323,48 @@ struct PyMod {
   }
 };
 
+template <typename TypeOut, typename TypeLhs, typename TypeRhs>
+struct NullLogicalAnd {
+  TypeOut operator()(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) const
+  {
+    if (lhs_valid && !x) {
+      output_valid = true;
+      return false;
+    }
+    if (rhs_valid && !y) {
+      output_valid = true;
+      return false;
+    }
+    if (lhs_valid && rhs_valid) {
+      output_valid = true;
+      return true;
+    }
+    output_valid = false;
+    return false;
+  }
+};
+
+template <typename TypeOut, typename TypeLhs, typename TypeRhs>
+struct NullLogicalOr {
+  TypeOut operator()(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) const
+  {
+    if (lhs_valid && x) {
+      output_valid = true;
+      return true;
+    }
+    if (rhs_valid && y) {
+      output_valid = true;
+      return true;
+    }
+    if (lhs_valid && rhs_valid) {
+      output_valid = true;
+      return false;
+    }
+    output_valid = false;
+    return false;
+  }
+};
+
 template <typename TypeOut, typename TypeLhs, typename TypeRhs>
 struct NullEquals {
   TypeOut operator()(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) const
diff --git a/cpp/tests/column/column_view_shallow_test.cpp b/cpp/tests/column/column_view_shallow_test.cpp
index 4afa96f08d7..8a742b50baa 100644
--- a/cpp/tests/column/column_view_shallow_test.cpp
+++ b/cpp/tests/column/column_view_shallow_test.cpp
@@ -163,7 +163,7 @@ TYPED_TEST(ColumnViewShallowTests, shallow_hash_update_data)
     col->set_null_mask(cudf::create_null_mask(col->size(), cudf::mask_state::ALL_VALID));
     auto col_view_new = cudf::column_view{*col};
     EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_new));
-    col_view_new.null_count();
+    [[maybe_unused]] auto const nulls = col_view_new.null_count();
     EXPECT_NE(shallow_hash(col_view), shallow_hash(col_view_new));
     auto col_view_new2 = cudf::column_view{*col};
     EXPECT_EQ(shallow_hash(col_view_new), shallow_hash(col_view_new2));
@@ -332,7 +332,7 @@ TYPED_TEST(ColumnViewShallowTests, is_shallow_equivalent_update_data)
     col->set_null_mask(cudf::create_null_mask(col->size(), cudf::mask_state::ALL_VALID));
     auto col_view_new = cudf::column_view{*col};
     EXPECT_FALSE(is_shallow_equivalent(col_view, col_view_new));
-    col_view_new.null_count();
+    [[maybe_unused]] auto const nulls = col_view_new.null_count();
     EXPECT_FALSE(is_shallow_equivalent(col_view, col_view_new));
     auto col_view_new2 = cudf::column_view{*col};
     EXPECT_TRUE(is_shallow_equivalent(col_view_new, col_view_new2));
diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu
index 306037e6473..a306736d131 100644
--- a/cpp/tests/copying/concatenate_tests.cu
+++ b/cpp/tests/copying/concatenate_tests.cu
@@ -343,7 +343,7 @@ TEST_F(OverflowTest, OverflowTest)
 
   // primitive column
   {
-    constexpr size_type size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
+    constexpr auto size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
 
     // try and concatenate 6 char columns of size 1 billion each
     auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, size);
@@ -355,7 +355,7 @@ TEST_F(OverflowTest, OverflowTest)
 
   // string column, overflow on chars
   {
-    constexpr size_type size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
+    constexpr auto size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
 
     // try and concatenate 6 string columns of with 1 billion chars in each
     auto offsets    = cudf::test::fixed_width_column_wrapper<offset_type>{0, size};
@@ -370,7 +370,7 @@ TEST_F(OverflowTest, OverflowTest)
 
   // string column, overflow on offsets (rows)
   {
-    constexpr size_type size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
+    constexpr auto size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
 
     // try and concatenate 6 string columns 1 billion rows each
     auto many_offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, size + 1);
@@ -385,8 +385,7 @@ TEST_F(OverflowTest, OverflowTest)
 
   // list<struct>, structs too long
   {
-    constexpr size_type inner_size =
-      static_cast<size_type>(static_cast<uint32_t>(512) * 1024 * 1024);
+    constexpr auto inner_size = static_cast<size_type>(static_cast<uint32_t>(512) * 1024 * 1024);
 
     // struct
     std::vector<std::unique_ptr<column>> children;
@@ -408,9 +407,8 @@ TEST_F(OverflowTest, OverflowTest)
 
   // struct<int, list>, list child too long
   {
-    constexpr size_type inner_size =
-      static_cast<size_type>(static_cast<uint32_t>(512) * 1024 * 1024);
-    constexpr size_type size = 3;
+    constexpr auto inner_size = static_cast<size_type>(static_cast<uint32_t>(512) * 1024 * 1024);
+    constexpr size_type size  = 3;
 
     // list
     auto offsets    = cudf::test::fixed_width_column_wrapper<offset_type>{0, 0, 0, inner_size};
@@ -437,7 +435,7 @@ TEST_F(OverflowTest, Presliced)
 
   // primitive column
   {
-    constexpr size_type size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
+    constexpr auto size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
 
     // try and concatenate 4 char columns of size ~1/2 billion each
     auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, size);
@@ -454,7 +452,7 @@ TEST_F(OverflowTest, Presliced)
 
   // struct<int8> column
   {
-    constexpr size_type size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
+    constexpr auto size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
 
     // try and concatenate 4 char columns of size ~1/2 billion each
     std::vector<std::unique_ptr<column>> children;
@@ -542,8 +540,7 @@ TEST_F(OverflowTest, Presliced)
 
   // list<struct>, structs too long
   {
-    constexpr size_type inner_size =
-      static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
+    constexpr auto inner_size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
 
     // struct
     std::vector<std::unique_ptr<column>> children;
@@ -616,8 +613,7 @@ TEST_F(OverflowTest, Presliced)
 
   // struct<int8, list>, list child elements too long
   {
-    constexpr size_type inner_size =
-      static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
+    constexpr auto inner_size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
     constexpr size_type num_rows  = 4;
     constexpr size_type list_size = inner_size / num_rows;
 
@@ -656,7 +652,7 @@ TEST_F(OverflowTest, BigColumnsSmallSlices)
 
   // primitive column
   {
-    constexpr size_type size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
+    constexpr auto size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
 
     auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, size);
     auto sliced     = cudf::slice(*many_chars, {16, 32});
@@ -668,8 +664,7 @@ TEST_F(OverflowTest, BigColumnsSmallSlices)
 
   // strings column
   {
-    constexpr size_type inner_size =
-      static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
+    constexpr auto inner_size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
     constexpr size_type num_rows    = 1024;
     constexpr size_type string_size = inner_size / num_rows;
 
@@ -696,8 +691,7 @@ TEST_F(OverflowTest, BigColumnsSmallSlices)
 
   // list<int8> column
   {
-    constexpr size_type inner_size =
-      static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
+    constexpr auto inner_size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
     constexpr size_type num_rows  = 1024;
     constexpr size_type list_size = inner_size / num_rows;
 
@@ -724,8 +718,7 @@ TEST_F(OverflowTest, BigColumnsSmallSlices)
 
   // struct<int8, list>
   {
-    constexpr size_type inner_size =
-      static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
+    constexpr auto inner_size = static_cast<size_type>(static_cast<uint32_t>(1024) * 1024 * 1024);
     constexpr size_type num_rows  = 1024;
     constexpr size_type list_size = inner_size / num_rows;
 
diff --git a/cpp/tests/copying/copy_tests.cpp b/cpp/tests/copying/copy_tests.cpp
index 4468bc69640..2f02f4cba02 100644
--- a/cpp/tests/copying/copy_tests.cpp
+++ b/cpp/tests/copying/copy_tests.cpp
@@ -70,30 +70,38 @@ TYPED_TEST(CopyTest, CopyIfElseTestLong)
   // make sure we span at least 2 warps
   int num_els = 64;
 
-  bool mask[] = {1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
-                 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
-                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  bool mask[] = {true, false, true, false, true, true, true,  true,  true,  true,  true, true, true,
+                 true, true,  true, true,  true, true, false, false, false, false, true, true, true,
+                 true, true,  true, true,  true, true, false, false, false, false, true, true, true,
+                 true, true,  true, true,  true, true, true,  true,  true,  true,  true, true, true,
+                 true, true,  true, true,  true, true, true,  true,  true,  true,  true, true};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els);
 
-  bool lhs_v[] = {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  bool lhs_v[] = {true, true, true, true, false, false, true, true, true, true, true, true, true,
+                  true, true, true, true, true,  true,  true, true, true, true, true, true, true,
+                  true, true, true, true, true,  true,  true, true, true, true, true, true, true,
+                  true, true, true, true, true,  true,  true, true, true, true, true, true, true,
+                  true, true, true, true, true,  true,  true, true, true, true, true, true};
   wrapper<T, int32_t> lhs_w({5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
                              5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
                              5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5},
                             lhs_v);
 
-  bool rhs_v[] = {1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  bool rhs_v[] = {true, true, true, true, true, true, false, false, true, true, true, true, true,
+                  true, true, true, true, true, true, true,  true,  true, true, true, true, true,
+                  true, true, true, true, true, true, true,  true,  true, true, true, true, true,
+                  true, true, true, true, true, true, true,  true,  true, true, true, true, true,
+                  true, true, true, true, true, true, true,  true,  true, true, true, true};
   wrapper<T, int32_t> rhs_w({6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
                              6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
                              6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6},
                             rhs_v);
 
-  bool exp_v[] = {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  bool exp_v[] = {true, true, true, true, false, false, true, true, true, true, true, true, true,
+                  true, true, true, true, true,  true,  true, true, true, true, true, true, true,
+                  true, true, true, true, true,  true,  true, true, true, true, true, true, true,
+                  true, true, true, true, true,  true,  true, true, true, true, true, true, true,
+                  true, true, true, true, true,  true,  true, true, true, true, true, true};
   wrapper<T, int32_t> expected_w({5, 6, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,
                                   6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5,
                                   5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5},
@@ -309,13 +317,13 @@ TYPED_TEST(CopyTestNumeric, CopyIfElseTestScalarColumn)
 
   int num_els = 4;
 
-  bool mask[] = {1, 0, 0, 1};
+  bool mask[] = {true, false, false, true};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els);
 
   cudf::numeric_scalar<T> lhs_w(5);
 
   const auto rhs = cudf::test::make_type_param_vector<T>({6, 6, 6, 6});
-  bool rhs_v[]   = {1, 0, 1, 1};
+  bool rhs_v[]   = {true, false, true, true};
   wrapper<T> rhs_w(rhs.begin(), rhs.end(), rhs_v);
 
   const auto expected = cudf::test::make_type_param_vector<T>({5, 6, 6, 5});
@@ -331,12 +339,12 @@ TYPED_TEST(CopyTestNumeric, CopyIfElseTestColumnScalar)
 
   int num_els = 4;
 
-  bool mask[]   = {1, 0, 0, 1};
-  bool mask_v[] = {1, 1, 1, 0};
+  bool mask[]   = {true, false, false, true};
+  bool mask_v[] = {true, true, true, false};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els, mask_v);
 
   const auto lhs = cudf::test::make_type_param_vector<T>({5, 5, 5, 5});
-  bool lhs_v[]   = {0, 1, 1, 1};
+  bool lhs_v[]   = {false, true, true, true};
   wrapper<T> lhs_w(lhs.begin(), lhs.end(), lhs_v);
 
   cudf::numeric_scalar<T> rhs_w(6);
@@ -354,7 +362,7 @@ TYPED_TEST(CopyTestNumeric, CopyIfElseTestScalarScalar)
 
   int num_els = 4;
 
-  bool mask[] = {1, 0, 0, 1};
+  bool mask[] = {true, false, false, true};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els);
 
   cudf::numeric_scalar<T> lhs_w(5);
@@ -399,12 +407,12 @@ TYPED_TEST(CopyTestChrono, CopyIfElseTestScalarColumn)
 
   int num_els = 4;
 
-  bool mask[] = {1, 0, 0, 1};
+  bool mask[] = {true, false, false, true};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els);
 
   auto lhs_w = create_chrono_scalar<T>{}(cudf::test::make_type_param_scalar<T>(5), true);
 
-  bool rhs_v[] = {1, 0, 1, 1};
+  bool rhs_v[] = {true, false, true, true};
   wrapper<T, int32_t> rhs_w({6, 6, 6, 6}, rhs_v);
 
   wrapper<T, int32_t> expected_w({5, 6, 6, 5}, rhs_v);
@@ -419,10 +427,10 @@ TYPED_TEST(CopyTestChrono, CopyIfElseTestColumnScalar)
 
   int num_els = 4;
 
-  bool mask[] = {1, 0, 0, 1};
+  bool mask[] = {true, false, false, true};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els);
 
-  bool lhs_v[] = {0, 1, 1, 1};
+  bool lhs_v[] = {false, true, true, true};
   wrapper<T, int32_t> lhs_w({5, 5, 5, 5}, lhs_v);
 
   auto rhs_w = create_chrono_scalar<T>{}(cudf::test::make_type_param_scalar<T>(6), true);
@@ -439,7 +447,7 @@ TYPED_TEST(CopyTestChrono, CopyIfElseTestScalarScalar)
 
   int num_els = 4;
 
-  bool mask[] = {1, 0, 0, 1};
+  bool mask[] = {true, false, false, true};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els);
 
   auto lhs_w = create_chrono_scalar<T>{}(cudf::test::make_type_param_scalar<T>(5), true);
@@ -477,8 +485,8 @@ TEST_F(StringsCopyIfElseTest, CopyIfElse)
   std::vector<const char*> h_strings2{"zz", "", "yyy", "w", "ééé", "ooo"};
   cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), valids);
 
-  bool mask[]   = {1, 1, 0, 1, 0, 1};
-  bool mask_v[] = {1, 1, 1, 1, 1, 0};
+  bool mask[]   = {true, true, false, true, false, true};
+  bool mask_v[] = {true, true, true, true, true, false};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + 6, mask_v);
 
   auto results = cudf::copy_if_else(strings1, strings2, mask_w);
@@ -504,8 +512,8 @@ TEST_F(StringsCopyIfElseTest, CopyIfElseScalarColumn)
   std::vector<const char*> h_strings2{"zz", "", "yyy", "w", "ééé", "ooo"};
   cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), valids);
 
-  bool mask[]   = {1, 0, 1, 0, 1, 0};
-  bool mask_v[] = {1, 1, 1, 1, 1, 0};
+  bool mask[]   = {true, false, true, false, true, false};
+  bool mask_v[] = {true, true, true, true, true, false};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + 6, mask_v);
 
   auto results = cudf::copy_if_else(strings1, strings2, mask_w);
@@ -532,7 +540,7 @@ TEST_F(StringsCopyIfElseTest, CopyIfElseColumnScalar)
   std::vector<const char*> h_strings2{"zz", "", "yyy", "w", "ééé", "ooo"};
   cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), valids);
 
-  bool mask[] = {0, 1, 1, 1, 0, 1};
+  bool mask[] = {false, true, true, true, false, true};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + 6);
 
   auto results = cudf::copy_if_else(strings2, strings1, mask_w);
@@ -560,14 +568,14 @@ TEST_F(StringsCopyIfElseTest, CopyIfElseScalarScalar)
   cudf::string_scalar string2{h_string2[0], false};
 
   constexpr cudf::size_type mask_size = 6;
-  bool mask[]                         = {1, 0, 1, 0, 1, 0};
+  bool mask[]                         = {true, false, true, false, true, false};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + mask_size);
 
   auto results = cudf::copy_if_else(string1, string2, mask_w);
 
   std::vector<const char*> h_expected;
-  for (cudf::size_type idx = 0; idx < static_cast<cudf::size_type>(mask_size); ++idx) {
-    if (mask[idx]) {
+  for (bool idx : mask) {
+    if (idx) {
       h_expected.push_back(h_string1[0]);
     } else {
       h_expected.push_back(h_string2[0]);
@@ -649,8 +657,8 @@ TEST_F(DictionaryCopyIfElseTest, ColumnColumn)
   cudf::test::dictionary_column_wrapper<std::string> input2(
     h_strings2.begin(), h_strings2.end(), valids);
 
-  bool mask[]   = {1, 1, 0, 1, 0, 1};
-  bool mask_v[] = {1, 1, 1, 1, 1, 0};
+  bool mask[]   = {true, true, false, true, false, true};
+  bool mask_v[] = {true, true, true, true, true, false};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + 6, mask_v);
 
   auto results = cudf::copy_if_else(input1, input2, mask_w);
@@ -676,7 +684,7 @@ TEST_F(DictionaryCopyIfElseTest, ColumnScalar)
   cudf::test::dictionary_column_wrapper<std::string> input2(
     h_strings.begin(), h_strings.end(), valids);
 
-  bool mask[] = {0, 1, 1, 1, 0, 1};
+  bool mask[] = {false, true, true, true, false, true};
   cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + 6);
 
   auto results = cudf::copy_if_else(input2, input1, mask_w);
diff --git a/cpp/tests/groupby/tdigest_tests.cu b/cpp/tests/groupby/tdigest_tests.cu
index 1199dfb44f2..2591f395914 100644
--- a/cpp/tests/groupby/tdigest_tests.cu
+++ b/cpp/tests/groupby/tdigest_tests.cu
@@ -36,8 +36,7 @@ namespace cudf {
 namespace test {
 
 using namespace cudf;
-
-typedef thrust::tuple<size_type, double, double> expected_value;
+using expected_value = thrust::tuple<size_type, double, double>;
 
 template <typename T>
 struct TDigestAllTypes : public cudf::test::BaseFixture {
diff --git a/cpp/tests/hash_map/multimap_test.cu b/cpp/tests/hash_map/multimap_test.cu
index 4a0e3807a4c..456ba951a45 100644
--- a/cpp/tests/hash_map/multimap_test.cu
+++ b/cpp/tests/hash_map/multimap_test.cu
@@ -66,20 +66,19 @@ class MultimapTest : public cudf::test::BaseFixture {
     rmm::cuda_stream_default.synchronize();
   }
 
-  ~MultimapTest() {}
+  ~MultimapTest() override {}
 };
 
 // Google Test can only do a parameterized typed-test over a single type, so we
 // have to nest multiple types inside of the KeyValueTypes struct above
 // KeyValueTypes<type1, type2> implies key_type = type1, value_type = type2
 // This list is the types across which Google Test will run our tests
-typedef ::testing::Types<KeyValueTypes<int, int>,
-                         KeyValueTypes<int, long long int>,
-                         KeyValueTypes<int, unsigned long long int>,
-                         KeyValueTypes<unsigned long long int, int>,
-                         KeyValueTypes<unsigned long long int, long long int>,
-                         KeyValueTypes<unsigned long long int, unsigned long long int>>
-  Implementations;
+using Implementations = ::testing::Types<KeyValueTypes<int, int>,
+                                         KeyValueTypes<int, long long>,
+                                         KeyValueTypes<int, unsigned long long>,
+                                         KeyValueTypes<unsigned long long, int>,
+                                         KeyValueTypes<unsigned long long, long long>,
+                                         KeyValueTypes<unsigned long long, unsigned long long>>;
 
 TYPED_TEST_SUITE(MultimapTest, Implementations);
 
diff --git a/cpp/tests/hashing/hash_test.cpp b/cpp/tests/hashing/hash_test.cpp
index bd6deae9dc4..da933b44b8d 100644
--- a/cpp/tests/hashing/hash_test.cpp
+++ b/cpp/tests/hashing/hash_test.cpp
@@ -38,7 +38,7 @@ TEST_F(HashTest, MultiValue)
                                             "The quick brown fox",
                                             "jumps over the lazy dog.",
                                             "All work and no play makes Jack a dull boy",
-                                            "!\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~"});
+                                            R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"});
 
   using limits = std::numeric_limits<int32_t>;
   fixed_width_column_wrapper<int32_t> const ints_col({0, 100, -100, limits::min(), limits::max()});
@@ -71,13 +71,13 @@ TEST_F(HashTest, MultiValueNulls)
                                              "The quick brown fox",
                                              "jumps over the lazy dog.",
                                              "All work and no play makes Jack a dull boy",
-                                             "!\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~"},
+                                             R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"},
                                             {0, 1, 1, 0, 1});
   strings_column_wrapper const strings_col2({"different but null",
                                              "The quick brown fox",
                                              "jumps over the lazy dog.",
                                              "I am Jack's complete lack of null value",
-                                             "!\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~"},
+                                             R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"},
                                             {0, 1, 1, 0, 1});
 
   // Nulls with different values should be equal
@@ -298,32 +298,36 @@ TEST_F(SparkMurmurHash3Test, MultiValueWithSeeds)
   // The hash values were determined by running the following Scala code in Apache Spark:
   // import org.apache.spark.sql.catalyst.util.DateTimeUtils
   // val schema = new StructType().add("structs", new StructType().add("a",IntegerType)
-  //     .add("b",StringType).add("c",new StructType().add("x",FloatType).add("y",LongType)))
+  //   .add("b",StringType).add("c",new StructType().add("x",FloatType).add("y",LongType)))
   //   .add("strings",StringType).add("doubles",DoubleType).add("timestamps",TimestampType)
   //   .add("decimal64", DecimalType(18,7)).add("longs",LongType).add("floats",FloatType)
   //   .add("dates",DateType).add("decimal32", DecimalType(9,3)).add("ints",IntegerType)
   //   .add("shorts",ShortType).add("bytes",ByteType).add("bools",BooleanType)
+  //   .add("decimal128", DecimalType(38,11))
   // val data = Seq(
   // Row(Row(0, "a", Row(0f, 0L)), "", 0.toDouble, DateTimeUtils.toJavaTimestamp(0), BigDecimal(0),
   //     0.toLong, 0.toFloat, DateTimeUtils.toJavaDate(0), BigDecimal(0), 0, 0.toShort, 0.toByte,
-  //     false),
+  //     false, BigDecimal(0)),
   // Row(Row(100, "bc", Row(100f, 100L)), "The quick brown fox", -(0.toDouble),
   //     DateTimeUtils.toJavaTimestamp(100), BigDecimal("0.00001"), 100.toLong, -(0.toFloat),
-  //     DateTimeUtils.toJavaDate(100), BigDecimal("0.1"), 100, 100.toShort, 100.toByte, true),
+  //     DateTimeUtils.toJavaDate(100), BigDecimal("0.1"), 100, 100.toShort, 100.toByte, true,
+  //     BigDecimal("0.000000001")),
   // Row(Row(-100, "def", Row(-100f, -100L)), "jumps over the lazy dog.", -Double.NaN,
   //     DateTimeUtils.toJavaTimestamp(-100), BigDecimal("-0.00001"), -100.toLong, -Float.NaN,
   //     DateTimeUtils.toJavaDate(-100), BigDecimal("-0.1"), -100, -100.toShort, -100.toByte,
-  //     true),
+  //     true, BigDecimal("-0.00000000001")),
   // Row(Row(0x12345678, "ghij", Row(Float.PositiveInfinity, 0x123456789abcdefL)),
   //     "All work and no play makes Jack a dull boy", Double.MinValue,
   //     DateTimeUtils.toJavaTimestamp(Long.MinValue/1000000), BigDecimal("-99999999999.9999999"),
   //     Long.MinValue, Float.MinValue, DateTimeUtils.toJavaDate(Int.MinValue/100),
-  //     BigDecimal("-999999.999"), Int.MinValue, Short.MinValue, Byte.MinValue, true),
+  //     BigDecimal("-999999.999"), Int.MinValue, Short.MinValue, Byte.MinValue, true,
+  //     BigDecimal("-9999999999999999.99999999999")),
   // Row(Row(-0x76543210, "klmno", Row(Float.NegativeInfinity, -0x123456789abcdefL)),
   //     "!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\ud720\ud721", Double.MaxValue,
   //     DateTimeUtils.toJavaTimestamp(Long.MaxValue/1000000), BigDecimal("99999999999.9999999"),
   //     Long.MaxValue, Float.MaxValue, DateTimeUtils.toJavaDate(Int.MaxValue/100),
-  //     BigDecimal("999999.999"), Int.MaxValue, Short.MaxValue, Byte.MaxValue, false))
+  //     BigDecimal("999999.999"), Int.MaxValue, Short.MaxValue, Byte.MaxValue, false,
+  //     BigDecimal("99999999999999999999999999.99999999999")))
   // val df = spark.createDataFrame(sc.parallelize(data), schema)
   // df.columns.foreach(c => println(s"$c => ${df.select(hash(col(c))).collect.mkString(",")}"))
   // df.select(hash(col("*"))).collect
@@ -353,8 +357,10 @@ TEST_F(SparkMurmurHash3Test, MultiValueWithSeeds)
     {933211791, 751823303, -1080202046, 1110053733, 1135925485});
   fixed_width_column_wrapper<int32_t> const hash_bools_expected(
     {933211791, -559580957, -559580957, -559580957, 933211791});
+  fixed_width_column_wrapper<int32_t> const hash_decimal128_expected(
+    {-783713497, -295670906, 1398487324, -52622807, -1359749815});
   fixed_width_column_wrapper<int32_t> const hash_combined_expected(
-    {-1172364561, -442972638, 1213234395, 796626751, 214075225});
+    {401603227, 588162166, 552160517, 1132537411, -326043017});
 
   using double_limits = std::numeric_limits<double>;
   using long_limits   = std::numeric_limits<int64_t>;
@@ -394,6 +400,13 @@ TEST_F(SparkMurmurHash3Test, MultiValueWithSeeds)
   fixed_width_column_wrapper<int8_t> const bytes_col({0, 100, -100, -128, 127});
   fixed_width_column_wrapper<bool> const bools_col1({0, 1, 1, 1, 0});
   fixed_width_column_wrapper<bool> const bools_col2({0, 1, 2, 255, 0});
+  fixed_point_column_wrapper<__int128_t> const decimal128_col(
+    {static_cast<__int128>(0),
+     static_cast<__int128>(100),
+     static_cast<__int128>(-1),
+     (static_cast<__int128>(0xFFFFFFFFFCC4D1C3u) << 64 | 0x602F7FC318000001u),
+     (static_cast<__int128>(0x0785EE10D5DA46D9u) << 64 | 0x00F4369FFFFFFFFFu)},
+    numeric::scale_type{-11});
 
   constexpr auto hasher      = cudf::hash_id::HASH_SPARK_MURMUR3;
   auto const hash_structs    = cudf::hash(cudf::table_view({structs_col}), hasher, 42);
@@ -410,6 +423,7 @@ TEST_F(SparkMurmurHash3Test, MultiValueWithSeeds)
   auto const hash_bytes      = cudf::hash(cudf::table_view({bytes_col}), hasher, 42);
   auto const hash_bools1     = cudf::hash(cudf::table_view({bools_col1}), hasher, 42);
   auto const hash_bools2     = cudf::hash(cudf::table_view({bools_col2}), hasher, 42);
+  auto const hash_decimal128 = cudf::hash(cudf::table_view({decimal128_col}), hasher, 42);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_structs, hash_structs_expected, verbosity);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_strings, hash_strings_expected, verbosity);
@@ -425,6 +439,7 @@ TEST_F(SparkMurmurHash3Test, MultiValueWithSeeds)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_bytes, hash_bytes_expected, verbosity);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_bools1, hash_bools_expected, verbosity);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_bools2, hash_bools_expected, verbosity);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_decimal128, hash_decimal128_expected, verbosity);
 
   auto const combined_table = cudf::table_view({structs_col,
                                                 strings_col,
@@ -438,7 +453,8 @@ TEST_F(SparkMurmurHash3Test, MultiValueWithSeeds)
                                                 ints_col,
                                                 shorts_col,
                                                 bytes_col,
-                                                bools_col2});
+                                                bools_col2,
+                                                decimal128_col});
   auto const hash_combined  = cudf::hash(combined_table, hasher, 42);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_combined, hash_combined_expected, verbosity);
 }
@@ -462,7 +478,7 @@ TEST_F(MD5HashTest, MultiValue)
      "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the "
      "MD5 hash function. This string needed to be longer.",
      "All work and no play makes Jack a dull boy",
-     "!\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~"});
+     R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"});
 
   strings_column_wrapper const md5_string_results1({"d41d8cd98f00b204e9800998ecf8427e",
                                                     "682240021651ae166d08fe2a014d5c09",
@@ -509,7 +525,7 @@ TEST_F(MD5HashTest, MultiValueNulls)
      "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the "
      "MD5 hash function. This string needed to be longer.",
      "All work and no play makes Jack a dull boy",
-     "!\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~"},
+     R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"},
     {1, 0, 0, 1, 0});
   strings_column_wrapper const strings_col2(
     {"",
@@ -551,7 +567,7 @@ TEST_F(MD5HashTest, StringListsNulls)
      "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the "
      "MD5 hash function. This string needed to be longer. It needed to be even longer.",
      "All work and no play makes Jack a dull boy",
-     "!\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~"});
+     R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"});
 
   lists_column_wrapper<cudf::string_view> strings_list_col(
     {{""},
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index b7835b4d4d1..868b19254ca 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -1901,7 +1901,7 @@ class TestSource : public cudf::io::datasource {
     return read_size;
   }
 
-  size_t size() const override { return str.size(); }
+  [[nodiscard]] size_t size() const override { return str.size(); }
 };
 
 TEST_F(CsvReaderTest, UserImplementedSource)
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index 837ac96ef21..a31cd22ee3e 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -85,8 +85,8 @@ std::unique_ptr<cudf::table> create_random_fixed_table(cudf::size_type num_colum
   }
   std::vector<std::unique_ptr<cudf::column>> columns(num_columns);
   std::transform(src_cols.begin(), src_cols.end(), columns.begin(), [](column_wrapper<T>& in) {
-    auto ret = in.release();
-    ret->has_nulls();
+    auto ret                    = in.release();
+    [[maybe_unused]] auto nulls = ret->has_nulls();  // pre-cache the null count
     return ret;
   });
   return std::make_unique<cudf::table>(std::move(columns));
@@ -162,8 +162,8 @@ inline auto random_values(size_t size)
 }
 
 struct SkipRowTest {
-  int test_calls;
-  SkipRowTest(void) : test_calls(0) {}
+  int test_calls{0};
+  SkipRowTest() {}
 
   std::unique_ptr<table> get_expected_result(const std::string& filepath,
                                              int skip_rows,
@@ -773,12 +773,12 @@ TEST_F(OrcChunkedWriterTest, Metadata)
 
 TEST_F(OrcChunkedWriterTest, Strings)
 {
-  bool mask1[] = {1, 1, 0, 1, 1, 1, 1};
+  bool mask1[] = {true, true, false, true, true, true, true};
   std::vector<const char*> h_strings1{"four", "score", "and", "seven", "years", "ago", "abcdefgh"};
   str_col strings1(h_strings1.begin(), h_strings1.end(), mask1);
   table_view tbl1({strings1});
 
-  bool mask2[] = {0, 1, 1, 1, 1, 1, 1};
+  bool mask2[] = {false, true, true, true, true, true, true};
   std::vector<const char*> h_strings2{"ooooo", "ppppppp", "fff", "j", "cccc", "bbb", "zzzzzzzzzzz"};
   str_col strings2(h_strings2.begin(), h_strings2.end(), mask2);
   table_view tbl2({strings2});
@@ -885,8 +885,9 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize)
 
   int num_els = 31;
 
-  bool mask[] = {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true};
 
   T c1a[num_els];
   std::fill(c1a, c1a + num_els, static_cast<T>(5));
@@ -927,8 +928,9 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize2)
 
   int num_els = 33;
 
-  bool mask[] = {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true, true, true};
 
   T c1a[num_els];
   std::fill(c1a, c1a + num_els, static_cast<T>(5));
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 9c656abb666..b45670fd265 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -76,7 +76,8 @@ std::unique_ptr<cudf::table> create_fixed_table(cudf::size_type num_columns,
                  columns.begin(),
                  [](cudf::test::fixed_width_column_wrapper<T>& in) {
                    auto ret = in.release();
-                   ret->has_nulls();
+                   // pre-cache the null count
+                   [[maybe_unused]] auto const nulls = ret->has_nulls();
                    return ret;
                  });
   return std::make_unique<cudf::table>(std::move(columns));
@@ -1086,7 +1087,7 @@ class custom_test_data_sink : public cudf::io::data_sink {
     outfile_.write(static_cast<char const*>(data), size);
   }
 
-  bool supports_device_write() const override { return true; }
+  [[nodiscard]] bool supports_device_write() const override { return true; }
 
   void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
   {
@@ -1413,13 +1414,13 @@ TEST_F(ParquetChunkedWriterTest, Strings)
 {
   std::vector<std::unique_ptr<cudf::column>> cols;
 
-  bool mask1[] = {1, 1, 0, 1, 1, 1, 1};
+  bool mask1[] = {true, true, false, true, true, true, true};
   std::vector<const char*> h_strings1{"four", "score", "and", "seven", "years", "ago", "abcdefgh"};
   cudf::test::strings_column_wrapper strings1(h_strings1.begin(), h_strings1.end(), mask1);
   cols.push_back(strings1.release());
   cudf::table tbl1(std::move(cols));
 
-  bool mask2[] = {0, 1, 1, 1, 1, 1, 1};
+  bool mask2[] = {false, true, true, true, true, true, true};
   std::vector<const char*> h_strings2{"ooooo", "ppppppp", "fff", "j", "cccc", "bbb", "zzzzzzzzzzz"};
   cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), mask2);
   cols.push_back(strings2.release());
@@ -2052,8 +2053,9 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize)
   int num_els = 31;
   std::vector<std::unique_ptr<cudf::column>> cols;
 
-  bool mask[] = {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true};
 
   T c1a[num_els];
   std::fill(c1a, c1a + num_els, static_cast<T>(5));
@@ -2099,8 +2101,9 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize2)
   int num_els = 33;
   std::vector<std::unique_ptr<cudf::column>> cols;
 
-  bool mask[] = {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true, true, true,
+                 true,  true, true, true, true, true, true, true, true, true, true};
 
   T c1a[num_els];
   std::fill(c1a, c1a + num_els, static_cast<T>(5));
@@ -2149,7 +2152,7 @@ class custom_test_memmap_sink : public cudf::io::data_sink {
 
   void host_write(void const* data, size_t size) override { mm_writer->host_write(data, size); }
 
-  bool supports_device_write() const override { return supports_device_writes; }
+  [[nodiscard]] bool supports_device_write() const override { return supports_device_writes; }
 
   void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override
   {
diff --git a/cpp/tests/join/mixed_join_tests.cu b/cpp/tests/join/mixed_join_tests.cu
index d6a348698b5..f9ed22150b7 100644
--- a/cpp/tests/join/mixed_join_tests.cu
+++ b/cpp/tests/join/mixed_join_tests.cu
@@ -641,3 +641,302 @@ TYPED_TEST(MixedFullJoinTest, Basic2)
               {JoinNoneValue, 1},
               {JoinNoneValue, 2}});
 }
+
+template <typename T>
+struct MixedJoinSingleReturnTest : public MixedJoinTest<T> {
+  /*
+   * Perform a join of tables constructed from two input data sets according to
+   * verify that the outputs match the expected outputs (up to order).
+   */
+  virtual void _test(cudf::table_view left_equality,
+                     cudf::table_view right_equality,
+                     cudf::table_view left_conditional,
+                     cudf::table_view right_conditional,
+                     cudf::ast::operation predicate,
+                     std::vector<cudf::size_type> expected_outputs,
+                     cudf::null_equality compare_nulls = cudf::null_equality::EQUAL)
+  {
+    auto [result_size, actual_counts] = this->join_size(
+      left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
+    EXPECT_TRUE(result_size == expected_outputs.size());
+
+    auto result = this->join(
+      left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
+    std::vector<cudf::size_type> resulting_indices;
+    for (size_t i = 0; i < result->size(); ++i) {
+      // Note: Not trying to be terribly efficient here since these tests are
+      // small, otherwise a batch copy to host before constructing the tuples
+      // would be important.
+      resulting_indices.push_back(result->element(i, rmm::cuda_stream_default));
+    }
+    std::sort(resulting_indices.begin(), resulting_indices.end());
+    std::sort(expected_outputs.begin(), expected_outputs.end());
+    EXPECT_TRUE(
+      std::equal(resulting_indices.begin(), resulting_indices.end(), expected_outputs.begin()));
+  }
+
+  /*
+   * Perform a join of tables constructed from two input data sets according to
+   * the provided predicate and verify that the outputs match the expected
+   * outputs (up to order).
+   */
+  void test(ColumnVector<T> left_data,
+            ColumnVector<T> right_data,
+            std::vector<cudf::size_type> equality_columns,
+            std::vector<cudf::size_type> conditional_columns,
+            cudf::ast::operation predicate,
+            std::vector<cudf::size_type> expected_outputs)
+  {
+    // Note that we need to maintain the column wrappers otherwise the
+    // resulting column views will be referencing potentially invalid memory.
+    auto [left_wrappers,
+          right_wrappers,
+          left_columns,
+          right_columns,
+          left_equality,
+          right_equality,
+          left_conditional,
+          right_conditional] =
+      this->parse_input(left_data, right_data, equality_columns, conditional_columns);
+    this->_test(left_equality,
+                right_equality,
+                left_conditional,
+                right_conditional,
+                predicate,
+                expected_outputs);
+  }
+
+  /*
+   * Perform a join of tables constructed from two input data sets according to
+   * the provided predicate and verify that the outputs match the expected
+   * outputs (up to order).
+   */
+  void test_nulls(NullableColumnVector<T> left_data,
+                  NullableColumnVector<T> right_data,
+                  std::vector<cudf::size_type> equality_columns,
+                  std::vector<cudf::size_type> conditional_columns,
+                  cudf::ast::operation predicate,
+                  std::vector<cudf::size_type> expected_outputs,
+                  cudf::null_equality compare_nulls = cudf::null_equality::EQUAL)
+  {
+    // Note that we need to maintain the column wrappers otherwise the
+    // resulting column views will be referencing potentially invalid memory.
+    auto [left_wrappers,
+          right_wrappers,
+          left_columns,
+          right_columns,
+          left_equality,
+          right_equality,
+          left_conditional,
+          right_conditional] =
+      this->parse_input(left_data, right_data, equality_columns, conditional_columns);
+    this->_test(left_equality,
+                right_equality,
+                left_conditional,
+                right_conditional,
+                predicate,
+                expected_outputs,
+                compare_nulls);
+  }
+
+  /**
+   * This method must be implemented by subclasses for specific types of joins.
+   * It should be a simply forwarding of arguments to the appropriate cudf
+   * mixed join API.
+   */
+  virtual SingleJoinReturn join(cudf::table_view left_equality,
+                                cudf::table_view right_equality,
+                                cudf::table_view left_conditional,
+                                cudf::table_view right_conditional,
+                                cudf::ast::operation predicate,
+                                cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) = 0;
+
+  /**
+   * This method must be implemented by subclasses for specific types of joins.
+   * It should be a simply forwarding of arguments to the appropriate cudf
+   * mixed join size computation API.
+   */
+  virtual std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<cudf::size_type>>> join_size(
+    cudf::table_view left_equality,
+    cudf::table_view right_equality,
+    cudf::table_view left_conditional,
+    cudf::table_view right_conditional,
+    cudf::ast::operation predicate,
+    cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) = 0;
+};
+
+/**
+ * Tests of mixed left semi joins.
+ */
+template <typename T>
+struct MixedLeftSemiJoinTest : public MixedJoinSingleReturnTest<T> {
+  SingleJoinReturn join(cudf::table_view left_equality,
+                        cudf::table_view right_equality,
+                        cudf::table_view left_conditional,
+                        cudf::table_view right_conditional,
+                        cudf::ast::operation predicate,
+                        cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
+  {
+    return cudf::mixed_left_semi_join(
+      left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
+  }
+
+  std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<cudf::size_type>>> join_size(
+    cudf::table_view left_equality,
+    cudf::table_view right_equality,
+    cudf::table_view left_conditional,
+    cudf::table_view right_conditional,
+    cudf::ast::operation predicate,
+    cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
+  {
+    return cudf::mixed_left_semi_join_size(
+      left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
+  }
+};
+
+TYPED_TEST_SUITE(MixedLeftSemiJoinTest, cudf::test::IntegralTypesNotBool);
+
+TYPED_TEST(MixedLeftSemiJoinTest, BasicEquality)
+{
+  this->test({{0, 1, 2}, {3, 4, 5}, {10, 20, 30}},
+             {{0, 1, 3}, {5, 4, 5}, {30, 40, 50}},
+             {0},
+             {1, 2},
+             left_zero_eq_right_zero,
+             {1});
+}
+
+TYPED_TEST(MixedLeftSemiJoinTest, BasicEqualityDuplicates)
+{
+  this->test({{0, 1, 2, 1}, {3, 4, 5, 6}, {10, 20, 30, 40}},
+             {{0, 1, 3, 1}, {5, 4, 5, 6}, {30, 40, 50, 40}},
+             {0},
+             {1, 2},
+             left_zero_eq_right_zero,
+             {1, 3});
+}
+
+TYPED_TEST(MixedLeftSemiJoinTest, BasicNullEqualityEqual)
+{
+  this->test_nulls({{{0, 1, 2}, {1, 1, 0}}, {{3, 4, 5}, {1, 1, 1}}, {{10, 20, 30}, {1, 1, 1}}},
+                   {{{0, 1, 3}, {1, 1, 0}}, {{5, 4, 5}, {1, 1, 1}}, {{30, 40, 30}, {1, 1, 1}}},
+                   {0},
+                   {1, 2},
+                   left_zero_eq_right_zero,
+                   {1, 2},
+                   cudf::null_equality::EQUAL);
+};
+
+TYPED_TEST(MixedLeftSemiJoinTest, BasicNullEqualityUnequal)
+{
+  this->test_nulls({{{0, 1, 2}, {1, 1, 0}}, {{3, 4, 5}, {1, 1, 1}}, {{10, 20, 30}, {1, 1, 1}}},
+                   {{{0, 1, 3}, {1, 1, 0}}, {{5, 4, 5}, {1, 1, 1}}, {{30, 40, 30}, {1, 1, 1}}},
+                   {0},
+                   {1, 2},
+                   left_zero_eq_right_zero,
+                   {1},
+                   cudf::null_equality::UNEQUAL);
+};
+
+TYPED_TEST(MixedLeftSemiJoinTest, AsymmetricEquality)
+{
+  this->test({{0, 2, 1}, {3, 5, 4}, {10, 30, 20}},
+             {{0, 1, 3}, {5, 4, 5}, {30, 40, 50}},
+             {0},
+             {1, 2},
+             left_zero_eq_right_zero,
+             {2});
+}
+
+TYPED_TEST(MixedLeftSemiJoinTest, AsymmetricLeftLargerEquality)
+{
+  this->test({{0, 2, 1, 4}, {3, 5, 4, 10}, {10, 30, 20, 100}},
+             {{0, 1, 3}, {5, 4, 5}, {30, 40, 50}},
+             {0},
+             {1, 2},
+             left_zero_eq_right_zero,
+             {2});
+}
+
+/**
+ * Tests of mixed left semi joins.
+ */
+template <typename T>
+struct MixedLeftAntiJoinTest : public MixedJoinSingleReturnTest<T> {
+  SingleJoinReturn join(cudf::table_view left_equality,
+                        cudf::table_view right_equality,
+                        cudf::table_view left_conditional,
+                        cudf::table_view right_conditional,
+                        cudf::ast::operation predicate,
+                        cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
+  {
+    return cudf::mixed_left_anti_join(
+      left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
+  }
+
+  std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<cudf::size_type>>> join_size(
+    cudf::table_view left_equality,
+    cudf::table_view right_equality,
+    cudf::table_view left_conditional,
+    cudf::table_view right_conditional,
+    cudf::ast::operation predicate,
+    cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) override
+  {
+    return cudf::mixed_left_anti_join_size(
+      left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
+  }
+};
+
+TYPED_TEST_SUITE(MixedLeftAntiJoinTest, cudf::test::IntegralTypesNotBool);
+
+TYPED_TEST(MixedLeftAntiJoinTest, BasicEquality)
+{
+  this->test({{0, 1, 2}, {3, 4, 5}, {10, 20, 30}},
+             {{0, 1, 3}, {5, 4, 5}, {30, 40, 50}},
+             {0},
+             {1, 2},
+             left_zero_eq_right_zero,
+             {0, 2});
+}
+
+TYPED_TEST(MixedLeftAntiJoinTest, BasicNullEqualityEqual)
+{
+  this->test_nulls({{{0, 1, 2}, {1, 1, 0}}, {{3, 4, 5}, {1, 1, 1}}, {{10, 20, 30}, {1, 1, 1}}},
+                   {{{0, 1, 3}, {1, 1, 0}}, {{5, 4, 5}, {1, 1, 1}}, {{30, 40, 30}, {1, 1, 1}}},
+                   {0},
+                   {1, 2},
+                   left_zero_eq_right_zero,
+                   {0},
+                   cudf::null_equality::EQUAL);
+};
+
+TYPED_TEST(MixedLeftAntiJoinTest, BasicNullEqualityUnequal)
+{
+  this->test_nulls({{{0, 1, 2}, {1, 1, 0}}, {{3, 4, 5}, {1, 1, 1}}, {{10, 20, 30}, {1, 1, 1}}},
+                   {{{0, 1, 3}, {1, 1, 0}}, {{5, 4, 5}, {1, 1, 1}}, {{30, 40, 30}, {1, 1, 1}}},
+                   {0},
+                   {1, 2},
+                   left_zero_eq_right_zero,
+                   {0, 2},
+                   cudf::null_equality::UNEQUAL);
+};
+
+TYPED_TEST(MixedLeftAntiJoinTest, AsymmetricEquality)
+{
+  this->test({{0, 2, 1}, {3, 5, 4}, {10, 30, 20}},
+             {{0, 1, 3}, {5, 4, 5}, {30, 40, 50}},
+             {0},
+             {1, 2},
+             left_zero_eq_right_zero,
+             {0, 1});
+}
+
+TYPED_TEST(MixedLeftAntiJoinTest, AsymmetricLeftLargerEquality)
+{
+  this->test({{0, 2, 1, 4}, {3, 5, 4, 10}, {10, 30, 20, 100}},
+             {{0, 1, 3}, {5, 4, 5}, {30, 40, 50}},
+             {0},
+             {1, 2},
+             left_zero_eq_right_zero,
+             {0, 1, 3});
+}
diff --git a/cpp/tests/replace/replace_tests.cpp b/cpp/tests/replace/replace_tests.cpp
index fe65fe0474a..7540dfd94c5 100644
--- a/cpp/tests/replace/replace_tests.cpp
+++ b/cpp/tests/replace/replace_tests.cpp
@@ -298,7 +298,7 @@ struct ReplaceTest : cudf::test::BaseFixture {
     std::srand(number_of_instantiations++);
   }
 
-  ~ReplaceTest() {}
+  ~ReplaceTest() override {}
 };
 
 /**
diff --git a/cpp/tests/scalar/factories_test.cpp b/cpp/tests/scalar/factories_test.cpp
index 3e89e435bc0..7cd8b655231 100644
--- a/cpp/tests/scalar/factories_test.cpp
+++ b/cpp/tests/scalar/factories_test.cpp
@@ -114,7 +114,7 @@ TYPED_TEST(DefaultScalarFactory, TypeCast)
 
   auto numeric_s = static_cast<cudf::scalar_type_t<TypeParam>*>(s.get());
 
-  EXPECT_NO_THROW(numeric_s->value());
+  EXPECT_NO_THROW((void)numeric_s->value());
   EXPECT_FALSE(numeric_s->is_valid());
   EXPECT_FALSE(s->is_valid());
 }
diff --git a/cpp/tests/strings/chars_types_tests.cpp b/cpp/tests/strings/chars_types_tests.cpp
index ff9f79ea87f..c1552ab3f57 100644
--- a/cpp/tests/strings/chars_types_tests.cpp
+++ b/cpp/tests/strings/chars_types_tests.cpp
@@ -51,13 +51,20 @@ TEST_P(CharsTypes, AllTypes)
                                      "de",
                                      "\t\r\n\f "};
 
-  bool expecteds[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,   // decimal
-                      0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,   // numeric
-                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,   // digit
-                      1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,   // alpha
-                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,   // space
-                      0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // upper
-                      0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0};  // lower
+  bool expecteds[] = {false, false, false, false, false, false, false, false,
+                      false, false, false, false, false, true,  false, false,  // decimal
+                      false, false, false, false, false, false, false, false,
+                      false, true,  false, true,  false, true,  false, false,  // numeric
+                      false, false, false, false, false, false, false, false,
+                      false, false, false, true,  false, true,  false, false,  // digit
+                      true,  true,  false, true,  false, false, false, false,
+                      false, false, false, false, false, false, true,  false,  // alpha
+                      false, false, false, false, false, false, false, false,
+                      false, false, false, false, false, false, false, true,  // space
+                      false, false, false, true,  false, false, false, false,
+                      false, false, false, false, false, false, false, false,  // upper
+                      false, true,  false, false, false, false, false, false,
+                      false, false, false, false, false, false, true,  false};  // lower
 
   auto is_parm = GetParam();
 
diff --git a/cpp/tests/strings/extract_tests.cpp b/cpp/tests/strings/extract_tests.cpp
index 2bb1c6dac8e..516882bd8ad 100644
--- a/cpp/tests/strings/extract_tests.cpp
+++ b/cpp/tests/strings/extract_tests.cpp
@@ -183,7 +183,7 @@ TEST_F(StringsExtractTests, ExtractAllTest)
 
   auto results = cudf::strings::extract_all(sv, "(\\d+) (\\w+)");
 
-  bool valids[] = {1, 1, 1, 0, 0, 0, 1};
+  bool valids[] = {true, true, true, false, false, false, true};
   using LCW     = cudf::test::lists_column_wrapper<cudf::string_view>;
   LCW expected({LCW{"123", "banana", "7", "eleven"},
                 LCW{"41", "apple"},
diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu
index 38f905078a7..d35cb5c3b9d 100644
--- a/cpp/tests/strings/factories_test.cu
+++ b/cpp/tests/strings/factories_test.cu
@@ -68,7 +68,7 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair)
       strings[idx] = thrust::pair<const char*, cudf::size_type>{nullptr, 0};
       nulls++;
     } else {
-      cudf::size_type length = (cudf::size_type)strlen(str);
+      auto length = (cudf::size_type)strlen(str);
       memcpy(h_buffer.data() + offset, str, length);
       strings[idx] = thrust::pair<const char*, cudf::size_type>{d_buffer.data() + offset, length};
       offset += length;
@@ -130,7 +130,7 @@ TEST_F(StringsFactoriesTest, CreateColumnFromOffsets)
     h_null_mask     = (h_null_mask << 1);
     const char* str = h_test_strings[idx];
     if (str) {
-      cudf::size_type length = (cudf::size_type)strlen(str);
+      auto length = (cudf::size_type)strlen(str);
       memcpy(h_buffer.data() + offset, str, length);
       offset += length;
       h_null_mask |= 1;
diff --git a/cpp/tests/strings/json_tests.cpp b/cpp/tests/strings/json_tests.cpp
index dfcc646a8f6..2dfe50d2ef5 100644
--- a/cpp/tests/strings/json_tests.cpp
+++ b/cpp/tests/strings/json_tests.cpp
@@ -389,7 +389,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter)
     auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
     auto result     = drop_whitespace(*result_raw);
 
-    cudf::test::strings_column_wrapper expected_raw{"[\"0-553-21311-3\",\"0-395-19395-8\"]"};
+    cudf::test::strings_column_wrapper expected_raw{R"(["0-553-21311-3","0-395-19395-8"])"};
     auto expected = drop_whitespace(expected_raw);
 
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
@@ -402,7 +402,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter)
     auto result     = drop_whitespace(*result_raw);
 
     cudf::test::strings_column_wrapper expected_raw{
-      "[\"reference\",\"fiction\",\"fiction\",\"fiction\"]"};
+      R"(["reference","fiction","fiction","fiction"])"};
     auto expected = drop_whitespace(expected_raw);
 
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
@@ -415,7 +415,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter)
     auto result     = drop_whitespace(*result_raw);
 
     cudf::test::strings_column_wrapper expected_raw{
-      "[\"Sayings of the Century\",\"Sword of Honour\",\"Moby Dick\",\"The Lord of the Rings\"]"};
+      R"(["Sayings of the Century","Sword of Honour","Moby Dick","The Lord of the Rings"])"};
     auto expected = drop_whitespace(expected_raw);
 
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
@@ -470,7 +470,7 @@ TEST_F(JsonPathTests, GetJsonObjectEmptyQuery)
 {
   // empty query -> null
   {
-    cudf::test::strings_column_wrapper input{"{\"a\" : \"b\"}"};
+    cudf::test::strings_column_wrapper input{R"({"a" : "b"})"};
     std::string json_path("");
     auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
 
@@ -497,7 +497,7 @@ TEST_F(JsonPathTests, GetJsonObjectEmptyInputsAndOutputs)
   // returning something, but it happens to be empty. so we expect
   // a valid, but empty row
   {
-    cudf::test::strings_column_wrapper input{"{\"store\": { \"bicycle\" : \"\" } }"};
+    cudf::test::strings_column_wrapper input{R"({"store": { "bicycle" : "" } })"};
     std::string json_path("$.store.bicycle");
     auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
 
@@ -513,7 +513,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery)
   // can't have more than one root operator, or a root operator anywhere other
   // than the beginning
   {
-    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    cudf::test::strings_column_wrapper input{R"({"a": "b"})"};
     std::string json_path("$$");
     auto query = [&]() {
       auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
@@ -523,7 +523,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery)
 
   // invalid index
   {
-    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    cudf::test::strings_column_wrapper input{R"({"a": "b"})"};
     std::string json_path("$[auh46h-]");
     auto query = [&]() {
       auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
@@ -533,7 +533,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery)
 
   // invalid index
   {
-    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    cudf::test::strings_column_wrapper input{R"({"a": "b"})"};
     std::string json_path("$[[]]");
     auto query = [&]() {
       auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
@@ -543,7 +543,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery)
 
   // negative index
   {
-    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    cudf::test::strings_column_wrapper input{R"({"a": "b"})"};
     std::string json_path("$[-1]");
     auto query = [&]() {
       auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
@@ -553,7 +553,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery)
 
   // child operator with no name specified
   {
-    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    cudf::test::strings_column_wrapper input{R"({"a": "b"})"};
     std::string json_path(".");
     auto query = [&]() {
       auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
@@ -562,7 +562,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery)
   }
 
   {
-    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    cudf::test::strings_column_wrapper input{R"({"a": "b"})"};
     std::string json_path("][");
     auto query = [&]() {
       auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
@@ -571,7 +571,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery)
   }
 
   {
-    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    cudf::test::strings_column_wrapper input{R"({"a": "b"})"};
     std::string json_path("6hw6,56i3");
     auto query = [&]() {
       auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
@@ -585,7 +585,7 @@ TEST_F(JsonPathTests, GetJsonObjectInvalidQuery)
 {
   // non-existent field
   {
-    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    cudf::test::strings_column_wrapper input{R"({"a": "b"})"};
     std::string json_path("$[*].c");
     auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
 
@@ -596,7 +596,7 @@ TEST_F(JsonPathTests, GetJsonObjectInvalidQuery)
 
   // non-existent field
   {
-    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    cudf::test::strings_column_wrapper input{R"({"a": "b"})"};
     std::string json_path("$[*].c[2]");
     auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
 
@@ -667,8 +667,8 @@ TEST_F(JsonPathTests, MixedOutput)
 
     // clang-format off
     cudf::test::strings_column_wrapper expected({
-      "{\"b\" : \"c\"}",
-      "{\"b\" : \"c\"}",
+      R"({"b" : "c"})",
+      R"({"b" : "c"})",
       "",
       "[\"y\",500]",
       "",
@@ -786,7 +786,7 @@ TEST_F(JsonPathTests, StripQuotes)
 
   // a valid, but empty row
   {
-    cudf::test::strings_column_wrapper input{"{\"store\": { \"bicycle\" : \"\" } }"};
+    cudf::test::strings_column_wrapper input{R"({"store": { "bicycle" : "" } })"};
     std::string json_path("$.store.bicycle");
 
     cudf::strings::get_json_object_options options;
@@ -858,8 +858,8 @@ TEST_F(JsonPathTests, AllowSingleQuotes)
 
     // clang-format off
     cudf::test::strings_column_wrapper expected({
-      "{\'b\' : \'c\'}",
-      "{\'b\' : \"c\"}",
+      R"({'b' : 'c'})",
+      R"({'b' : "c"})",
       "",
       "[\'y\',500]",
       "",
@@ -902,7 +902,7 @@ TEST_F(JsonPathTests, StringsWithSpecialChars)
 
       // clang-format off
       cudf::test::strings_column_wrapper expected({
-        "[{\"key\" : \"value[\"}]",
+        R"([{"key" : "value["}])",
       });
       // clang-format on
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
@@ -928,7 +928,7 @@ TEST_F(JsonPathTests, StringsWithSpecialChars)
 
       // clang-format off
       cudf::test::strings_column_wrapper expected({
-        "[}{}][][{[\\\"}}[\\\"]",
+        R"([}{}][][{[\"}}[\"])",
       });
       // clang-format on
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
@@ -961,8 +961,8 @@ TEST_F(JsonPathTests, EscapeSequences)
 
     // clang-format off
     cudf::test::strings_column_wrapper expected({
-      "\\\" \\\\ \\/ \\b \\f \\n \\r \\t",
-      "\\u1248 \\uacdf \\uACDF \\u10EF"
+      R"(\" \\ \/ \b \f \n \r \t)",
+      R"(\u1248 \uacdf \uACDF \u10EF)"
     });
     // clang-format on
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
diff --git a/cpp/tests/strings/replace_regex_tests.cpp b/cpp/tests/strings/replace_regex_tests.cpp
index eac06fa4588..ddbd9f5b3d6 100644
--- a/cpp/tests/strings/replace_regex_tests.cpp
+++ b/cpp/tests/strings/replace_regex_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -145,6 +145,16 @@ TEST_F(StringsReplaceRegexTest, MultiReplacement)
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, input);
 }
 
+TEST_F(StringsReplaceRegexTest, WordBoundary)
+{
+  cudf::test::strings_column_wrapper input({"aba bcd\naba", "zéz", "A1B2-é3", "e é"});
+  auto results =
+    cudf::strings::replace_re(cudf::strings_column_view(input), "\\b", cudf::string_scalar("X"));
+  cudf::test::strings_column_wrapper expected(
+    {"XabaX XbcdX\nXabaX", "XzézX", "XA1B2X-Xé3X", "XeX XéX"});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+}
+
 TEST_F(StringsReplaceRegexTest, Multiline)
 {
   auto const multiline = cudf::strings::regex_flags::MULTILINE;
diff --git a/cpp/tests/table/table_view_tests.cu b/cpp/tests/table/table_view_tests.cu
index c94963525a0..a1c0c49a881 100644
--- a/cpp/tests/table/table_view_tests.cu
+++ b/cpp/tests/table/table_view_tests.cu
@@ -123,7 +123,7 @@ TEST_F(TableViewTest, SelectOutOfBounds)
   fixed_width_column_wrapper<int64_t> col4{{4, 5, 6, 7}};
   cudf::table_view t{{col1, col2}};
 
-  EXPECT_THROW(t.select({2, 3, 4}), std::out_of_range);
+  EXPECT_THROW((void)t.select({2, 3, 4}), std::out_of_range);
 }
 
 TEST_F(TableViewTest, SelectNoColumns)
diff --git a/docker/package_versions.sh b/docker/package_versions.sh
deleted file mode 100755
index c558f66d511..00000000000
--- a/docker/package_versions.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env bash
-# Usage:
-# "./package_versions.sh /cudf/conda/environments/cudf_dev.yml" - Updates package versions in file based on Docker build-args
-
-FILENAME=$1
-
-set_version() {
-    sed -i "s/\- $1\([<>=][^a-zA-Z]*\)\?$/\- $1=$2/" $FILENAME
-}
-
-replace_text() {
-    sed -i "s/$1/$2/" $FILENAME
-}
-
-add_package() {
-    sed -i "s/\- $1\([<>=][^a-zA-Z]*\)\?$/a \- $2=$3/" $FILENAME
-}
-
-if [ "$PYTHON_VERSION" ]; then
-    PACKAGE_NAME="python"
-    set_version "$PACKAGE_NAME" "$PYTHON_VERSION"
-fi
-
-if [ "$NUMBA_VERSION" ]; then
-    PACKAGE_NAME="numba"
-    set_version "$PACKAGE_NAME" "$NUMBA_VERSION"
-fi
-
-if [ "$PANDAS_VERSION" ]; then
-    PACKAGE_NAME="pandas"
-    set_version "$PACKAGE_NAME" "$PANDAS_VERSION"
-fi
-
-if [ "$PYARROW_VERSION" ]; then
-    PACKAGE_NAME="pyarrow"
-    set_version "$PACKAGE_NAME" "$PYARROW_VERSION"
-fi
-
-if [ "$CYTHON_VERSION" ]; then
-    PACKAGE_NAME="cython"
-    set_version "$PACKAGE_NAME" "$CYTHON_VERSION"
-fi
-
-if [ "$CMAKE_VERSION" ]; then
-    PACKAGE_NAME="cmake"
-    set_version "$PACKAGE_NAME" "$CMAKE_VERSION"
-fi
-
-if [ "$NUMPY_VERSION" ]; then
-    ABOVE_PACKAGE="pandas"
-    PACKAGE_NAME="numpy"
-    add_package "$ABOVE_PACKAGE" "$PACKAGE_NAME" "$NUMPY_VERSION"
-fi
diff --git a/docker_build/Dockerfile b/docker_build/Dockerfile
deleted file mode 100644
index 696a6969778..00000000000
--- a/docker_build/Dockerfile
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-
-ARG CUDA_VERSION=11.2.2
-FROM nvidia/cuda:${CUDA_VERSION}-devel
-ENV CUDA_SHORT_VERSION=11.2
-
-SHELL ["/bin/bash", "-c"]
-ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/lib:/repos/dist/lib
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-ENV CUDA_HOME=/usr/local/cuda
-ENV CUDA_PATH=$CUDA_HOME
-ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin
-
-# Build env variables for arrow
-ENV CMAKE_BUILD_TYPE=release
-ENV PYARROW_WITH_PARQUET=1
-ENV PYARROW_WITH_CUDA=1
-ENV PYARROW_WITH_ORC=1
-ENV PYARROW_WITH_DATASET=1
-
-ENV ARROW_HOME=/repos/dist
-
-# Build env variables for rmm
-ENV INSTALL_PREFIX=/usr
-
-
-RUN apt update -y --fix-missing && \
-    apt upgrade -y && \
-      apt install -y --no-install-recommends software-properties-common && \
-      add-apt-repository ppa:deadsnakes/ppa && \
-      apt update -y --fix-missing
-
-RUN apt install -y --no-install-recommends \
-      git \
-      python3.8-dev \
-      build-essential \
-      autoconf \
-      bison \
-      flex \
-      libjemalloc-dev \
-      wget \
-      libssl-dev \
-      protobuf-compiler && \
-    apt-get autoremove -y && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/* && \
-    update-alternatives --install /usr/bin/python python /usr/bin/python3.8 1 && \
-    wget https://bootstrap.pypa.io/get-pip.py && \
-    python get-pip.py
-
-# Install cmake
-RUN version=3.18 && build=5 && mkdir ~/temp && cd ~/temp && wget https://cmake.org/files/v$version/cmake-$version.$build.tar.gz && \
-    tar -xzvf cmake-$version.$build.tar.gz && cd cmake-$version.$build/ && ./bootstrap && make -j$(nproc) && make install
-
-# Install arrow from source
-RUN git clone https://github.com/apache/arrow.git /repos/arrow && mkdir /repos/dist/ && cd /repos/arrow && git checkout apache-arrow-1.0.1 && git submodule init && \
-    git submodule update && export PARQUET_TEST_DATA="${PWD}/cpp/submodules/parquet-testing/data" && export ARROW_TEST_DATA="${PWD}/testing/data" && \
-    cd /repos/arrow/cpp && mkdir release && cd /repos/arrow/cpp/release && pip install -r /repos/arrow/python/requirements-build.txt && \
-    cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME -DCMAKE_INSTALL_LIBDIR=lib -DARROW_FLIGHT=ON -DARROW_GANDIVA=OFF -DARROW_ORC=ON -DARROW_WITH_BZ2=ON -DARROW_WITH_ZLIB=ON -DARROW_WITH_ZSTD=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_SNAPPY=ON -DARROW_WITH_BROTLI=ON -DARROW_PARQUET=ON -DARROW_PYTHON=ON -DARROW_PLASMA=ON -DARROW_BUILD_TESTS=ON -DARROW_CUDA=ON -DARROW_DATASET=ON .. && \
-    make -j$(nproc) && make install && cd /repos/arrow/python/ && python setup.py build_ext --build-type=release bdist_wheel && pip install /repos/arrow/python/dist/*.whl
-
-
-# Install rmm from source
-RUN cd /repos/ && git clone https://github.com/rapidsai/rmm.git && cd /repos/rmm/ && ./build.sh librmm && pip install /repos/rmm/python/.
-
-ADD . /repos/cudf/
-
-# Build env for CUDF build
-ENV CUDF_HOME=/repos/cudf/
-ENV CUDF_ROOT=/repos/cudf/cpp/build/
-
-# Install cudf from source
-RUN cd /repos/cudf/ && git submodule update --init --recursive && ./build.sh libcudf && \
-    pip install /repos/cudf/python/cudf/.
-
diff --git a/docs/cudf/source/api_docs/groupby.rst b/docs/cudf/source/api_docs/groupby.rst
index 575d7442cdf..190978a7581 100644
--- a/docs/cudf/source/api_docs/groupby.rst
+++ b/docs/cudf/source/api_docs/groupby.rst
@@ -34,6 +34,7 @@ Function application
    SeriesGroupBy.aggregate
    DataFrameGroupBy.aggregate
    GroupBy.pipe
+   GroupBy.transform
 
 Computations / descriptive stats
 --------------------------------
diff --git a/docs/cudf/source/basics/groupby.rst b/docs/cudf/source/basics/groupby.rst
index f3269768025..cbc8f7e712f 100644
--- a/docs/cudf/source/basics/groupby.rst
+++ b/docs/cudf/source/basics/groupby.rst
@@ -1,3 +1,5 @@
+.. _basics.groupby:
+
 GroupBy
 =======
 
@@ -220,6 +222,27 @@ Limitations
  .. |describe| replace:: ``describe``
  .. _describe: https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#flexible-apply
 
+
+Transform
+---------
+
+The ``.transform()`` method aggregates per group, and broadcasts the
+result to the group size, resulting in a Series/DataFrame that is of
+the same size as the input Series/DataFrame.
+
+.. code:: python
+
+     >>> import cudf
+     >>> df = cudf.DataFrame({'a': [2, 1, 1, 2, 2], 'b': [1, 2, 3, 4, 5]})
+     >>> df.groupby('a').transform('max')
+        b
+     0  5
+     1  3
+     2  3
+     3  5
+     4  5
+
+
 Rolling window calculations
 ---------------------------
 
diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index 2c184252192..3d6d3ceb399 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -80,9 +80,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '22.02'
+version = '22.04'
 # The full version, including alpha/beta/rc tags.
-release = '22.02.00'
+release = '22.04.00'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
index e1e0d5ef2da..4c7a8d4e449 100644
--- a/fetch_rapids.cmake
+++ b/fetch_rapids.cmake
@@ -11,7 +11,7 @@
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
 # =============================================================================
-file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.02/RAPIDS.cmake
+file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.04/RAPIDS.cmake
      ${CMAKE_BINARY_DIR}/RAPIDS.cmake
 )
 include(${CMAKE_BINARY_DIR}/RAPIDS.cmake)
diff --git a/java/ci/README.md b/java/ci/README.md
index 0e947b62511..f022bec04e3 100644
--- a/java/ci/README.md
+++ b/java/ci/README.md
@@ -34,7 +34,7 @@ nvidia-docker run -it cudf-build:11.5.0-devel-centos7 bash
 You can download the cuDF repo in the docker container or you can mount it into the container.
 Here I choose to download again in the container.
 ```bash
-git clone --recursive https://github.com/rapidsai/cudf.git -b branch-22.02
+git clone --recursive https://github.com/rapidsai/cudf.git -b branch-22.04
 ```
 
 ### Build cuDF jar with devtoolset
@@ -47,5 +47,5 @@ scl enable devtoolset-9 "java/ci/build-in-docker.sh"
 
 ### The output
 
-You can find the cuDF jar in java/target/ like cudf-22.02.0-SNAPSHOT-cuda11.jar.
+You can find the cuDF jar in java/target/ like cudf-22.04.0-SNAPSHOT-cuda11.jar.
 
diff --git a/java/pom.xml b/java/pom.xml
index ec6968ca761..8f0fb1000d8 100755
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -21,7 +21,7 @@
 
     <groupId>ai.rapids</groupId>
     <artifactId>cudf</artifactId>
-    <version>22.02.0-SNAPSHOT</version>
+    <version>22.04.0-SNAPSHOT</version>
 
     <name>cudfjni</name>
     <description>
diff --git a/java/src/main/java/ai/rapids/cudf/BinaryOp.java b/java/src/main/java/ai/rapids/cudf/BinaryOp.java
index 8b58d8383b4..15b8d32d6da 100644
--- a/java/src/main/java/ai/rapids/cudf/BinaryOp.java
+++ b/java/src/main/java/ai/rapids/cudf/BinaryOp.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2020,2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -49,8 +49,10 @@ public enum BinaryOp {
   GREATER_EQUAL(25), // >=
   NULL_EQUALS(26), // like EQUAL but NULL == NULL is TRUE and NULL == not NULL is FALSE
   NULL_MAX(27), // MAX but NULL < not NULL
-  NULL_MIN(28); // MIN but NULL > not NULL
+  NULL_MIN(28), // MIN but NULL > not NULL
   //NOT IMPLEMENTED YET GENERIC_BINARY(29);
+  NULL_LOGICAL_AND(30),
+  NULL_LOGICAL_OR(31);
 
 
   static final EnumSet<BinaryOp> COMPARISON = EnumSet.of(
diff --git a/java/src/main/java/ai/rapids/cudf/HostColumnVector.java b/java/src/main/java/ai/rapids/cudf/HostColumnVector.java
index e21a4ac81c6..0fe7d7a5df8 100644
--- a/java/src/main/java/ai/rapids/cudf/HostColumnVector.java
+++ b/java/src/main/java/ai/rapids/cudf/HostColumnVector.java
@@ -1,6 +1,6 @@
 /*
  *
- *  Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ *  Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -1136,6 +1136,8 @@ private void appendChildOrNull(ColumnBuilder childBuilder, Object listElement) {
         childBuilder.append((Short) listElement);
       } else if (listElement instanceof BigDecimal) {
         childBuilder.append((BigDecimal) listElement);
+      } else if (listElement instanceof BigInteger) {
+        childBuilder.append((BigInteger) listElement);
       } else if (listElement instanceof List) {
         childBuilder.append((List<?>) listElement);
       } else if (listElement instanceof StructData) {
@@ -1230,18 +1232,20 @@ public final ColumnBuilder append(boolean value) {
       return this;
     }
 
-    public final ColumnBuilder append(BigDecimal value) {
+    public ColumnBuilder append(BigDecimal value) {
+      return append(value.setScale(-type.getScale(), RoundingMode.UNNECESSARY).unscaledValue());
+    }
+
+    public ColumnBuilder append(BigInteger unscaledVal) {
       growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
       assert currentIndex < rows;
-      // Rescale input decimal with UNNECESSARY policy, which accepts no precision loss.
-      BigInteger unscaledVal = value.setScale(-type.getScale(), RoundingMode.UNNECESSARY).unscaledValue();
       if (type.typeId == DType.DTypeEnum.DECIMAL32) {
         data.setInt(currentIndex * type.getSizeInBytes(), unscaledVal.intValueExact());
       } else if (type.typeId == DType.DTypeEnum.DECIMAL64) {
         data.setLong(currentIndex * type.getSizeInBytes(), unscaledVal.longValueExact());
       } else if (type.typeId == DType.DTypeEnum.DECIMAL128) {
         assert currentIndex < rows;
-        byte[] unscaledValueBytes = value.unscaledValue().toByteArray();
+        byte[] unscaledValueBytes = unscaledVal.toByteArray();
         byte[] result = convertDecimal128FromJavaToCudf(unscaledValueBytes);
         data.setBytes(currentIndex*DType.DTypeEnum.DECIMAL128.sizeInBytes, result, 0, result.length);
       }  else {
diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
index 2db37d57cbb..00747efff27 100755
--- a/java/src/main/native/CMakeLists.txt
+++ b/java/src/main/native/CMakeLists.txt
@@ -26,7 +26,7 @@ rapids_cuda_init_architectures(CUDF_JNI)
 
 project(
   CUDF_JNI
-  VERSION 22.02.00
+  VERSION 22.04.00
   LANGUAGES C CXX CUDA
 )
 
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index db9758fdcdf..7f7499ee238 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -1758,7 +1758,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin(
     chunked_orc_writer_options opts = chunked_orc_writer_options::builder(sink)
                                           .metadata(&metadata)
                                           .compression(static_cast<compression_type>(j_compression))
-                                          .enable_statistics(true)
+                                          .enable_statistics(ORC_STATISTICS_ROW_GROUP)
                                           .key_value_metadata(kv_metadata)
                                           .build();
     auto writer_ptr = std::make_unique<cudf::io::orc_chunked_writer>(opts);
@@ -1801,7 +1801,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(
     chunked_orc_writer_options opts = chunked_orc_writer_options::builder(sink)
                                           .metadata(&metadata)
                                           .compression(static_cast<compression_type>(j_compression))
-                                          .enable_statistics(true)
+                                          .enable_statistics(ORC_STATISTICS_ROW_GROUP)
                                           .key_value_metadata(kv_metadata)
                                           .build();
     auto writer_ptr = std::make_unique<cudf::io::orc_chunked_writer>(opts);
diff --git a/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java b/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java
index 0ca997d3c80..862f3860d3d 100644
--- a/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java
+++ b/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java
@@ -1,6 +1,6 @@
 /*
  *
- *  Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ *  Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -1363,6 +1363,46 @@ public void testBitXor() {
     }
   }
 
+  @Test
+  public void testNullAnd() {
+    try (ColumnVector icv1 = ColumnVector.fromBoxedBooleans(
+        true, true, true,
+        false, false, false,
+        null, null, null);
+         ColumnVector icv2 = ColumnVector.fromBoxedBooleans(
+             true, false, null,
+             true, false, null,
+             true, false, null)) {
+      try (ColumnVector answer = icv1.binaryOp(BinaryOp.NULL_LOGICAL_AND, icv2, DType.BOOL8);
+           ColumnVector expected = ColumnVector.fromBoxedBooleans(
+               true, false, null,
+               false, false, false,
+               null, false, null)) {
+        assertColumnsAreEqual(expected, answer, "boolean NULL AND boolean");
+      }
+    }
+  }
+
+  @Test
+  public void testNullOr() {
+    try (ColumnVector icv1 = ColumnVector.fromBoxedBooleans(
+        true, true, true,
+        false, false, false,
+        null, null, null);
+         ColumnVector icv2 = ColumnVector.fromBoxedBooleans(
+             true, false, null,
+             true, false, null,
+             true, false, null)) {
+      try (ColumnVector answer = icv1.binaryOp(BinaryOp.NULL_LOGICAL_OR, icv2, DType.BOOL8);
+           ColumnVector expected = ColumnVector.fromBoxedBooleans(
+               true, true, true,
+               true, false, null,
+               true, null, null)) {
+        assertColumnsAreEqual(expected, answer, "boolean NULL OR boolean");
+      }
+    }
+  }
+
   @Test
   public void testAnd() {
     try (ColumnVector icv1 = ColumnVector.fromBoxedBooleans(BOOLEANS_1);
diff --git a/python/cudf/cudf/_fuzz_testing/io.py b/python/cudf/cudf/_fuzz_testing/io.py
index 1312300f714..193fb4c7f7f 100644
--- a/python/cudf/cudf/_fuzz_testing/io.py
+++ b/python/cudf/cudf/_fuzz_testing/io.py
@@ -25,6 +25,9 @@ def __init__(
         max_string_length=None,
         max_lists_length=None,
         max_lists_nesting_depth=None,
+        max_structs_nesting_depth=None,
+        max_struct_null_frequency=None,
+        max_struct_types_at_each_level=None,
     ):
         dirs = [] if dirs is None else dirs
         self._inputs = []
@@ -33,6 +36,9 @@ def __init__(
         self._max_string_length = max_string_length
         self._max_lists_length = max_lists_length
         self._max_lists_nesting_depth = max_lists_nesting_depth
+        self._max_structs_nesting_depth = max_structs_nesting_depth
+        self._max_struct_null_frequency = max_struct_null_frequency
+        self._max_struct_types_at_each_level = max_struct_types_at_each_level
 
         for i, path in enumerate(dirs):
             if i == 0 and not os.path.exists(path):
diff --git a/python/cudf/cudf/_fuzz_testing/orc.py b/python/cudf/cudf/_fuzz_testing/orc.py
index 2aa01eb3967..78e01fb76a4 100644
--- a/python/cudf/cudf/_fuzz_testing/orc.py
+++ b/python/cudf/cudf/_fuzz_testing/orc.py
@@ -83,7 +83,10 @@ def generate_input(self):
         self._df = df
         file_obj = io.BytesIO()
         pandas_to_orc(
-            df, file_io_obj=file_obj, stripe_size=self._rand(len(df))
+            df,
+            file_io_obj=file_obj,
+            stripe_size=self._rand(len(df)),
+            arrow_table_schema=table.schema,
         )
         file_obj.seek(0)
         buf = file_obj.read()
diff --git a/python/cudf/cudf/_fuzz_testing/parquet.py b/python/cudf/cudf/_fuzz_testing/parquet.py
index 5b00f96d88d..859d09b407f 100644
--- a/python/cudf/cudf/_fuzz_testing/parquet.py
+++ b/python/cudf/cudf/_fuzz_testing/parquet.py
@@ -59,6 +59,7 @@ def generate_input(self):
                 - {"uint32"}
                 | {"list", "decimal64"}
             )
+
             dtypes_meta, num_rows, num_cols = _generate_rand_meta(
                 self, dtypes_list
             )
@@ -80,6 +81,7 @@ def generate_input(self):
         # https://issues.apache.org/jira/browse/ARROW-10123
 
         # file = io.BytesIO()
+
         df.to_parquet("temp_file")
         # file.seek(0)
         # self._current_buffer = copy.copy(file.read())
diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_orc.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_orc.py
index b3fd7e8c5a7..977038d1fcb 100644
--- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_orc.py
+++ b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_orc.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import io
 import sys
@@ -74,7 +74,7 @@ def orc_reader_stripes_test(input_tuple, columns, stripes):
     data_handle=OrcWriter,
     params={
         "compression": [None, "snappy"],
-        "enable_statistics": [True, False],
+        "enable_statistics": ["NONE", "STRIPE", "ROWGROUP"],
     },
 )
 def orc_writer_test(pdf, compression, enable_statistics):
diff --git a/python/cudf/cudf/_fuzz_testing/utils.py b/python/cudf/cudf/_fuzz_testing/utils.py
index ff5870c50be..87a8fc46374 100644
--- a/python/cudf/cudf/_fuzz_testing/utils.py
+++ b/python/cudf/cudf/_fuzz_testing/utils.py
@@ -6,6 +6,7 @@
 import fastavro
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 import pyorc
 
 import cudf
@@ -114,6 +115,26 @@ def _generate_rand_meta(obj, dtypes_list, null_frequency_override=None):
             meta["value_type"] = random.choice(
                 list(cudf.utils.dtypes.ALL_TYPES - {"category"})
             )
+        elif dtype == "struct":
+            if obj._max_lists_nesting_depth is None:
+                meta["nesting_max_depth"] = np.random.randint(2, 10)
+            else:
+                meta["nesting_max_depth"] = obj._max_lists_nesting_depth
+
+            if obj._max_struct_null_frequency is None:
+                meta["max_null_frequency"] = random.uniform(0, 1)
+            else:
+                meta["max_null_frequency"] = obj._max_struct_null_frequency
+
+            if obj._max_struct_types_at_each_level is None:
+                meta["max_types_at_each_level"] = np.random.randint(
+                    low=1, high=10
+                )
+            else:
+                meta[
+                    "max_types_at_each_level"
+                ] = obj._max_struct_types_at_each_level
+
         elif dtype == "decimal64":
             meta["max_precision"] = cudf.Decimal64Dtype.MAX_PRECISION
         elif dtype == "decimal32":
@@ -161,6 +182,8 @@ def pyarrow_to_pandas(table):
             df[column._name] = pd.Series(
                 column, dtype=pyarrow_dtypes_to_pandas_dtypes[column.type]
             )
+        elif isinstance(column.type, pa.StructType):
+            df[column._name] = column.to_pandas(integer_object_nulls=True)
         else:
             df[column._name] = column.to_pandas()
 
@@ -196,6 +219,14 @@ def get_orc_dtype_info(dtype):
         )
 
 
+def get_arrow_dtype_info_for_pyorc(dtype):
+    if isinstance(dtype, pa.StructType):
+        return get_orc_schema(df=None, arrow_table_schema=dtype)
+    else:
+        pd_dtype = cudf.dtype(dtype.to_pandas_dtype())
+        return get_orc_dtype_info(pd_dtype)
+
+
 def get_avro_schema(df):
     fields = [
         {"name": col_name, "type": get_avro_dtype_info(col_dtype)}
@@ -205,11 +236,17 @@ def get_avro_schema(df):
     return schema
 
 
-def get_orc_schema(df):
-    ordered_dict = OrderedDict(
-        (col_name, get_orc_dtype_info(col_dtype))
-        for col_name, col_dtype in df.dtypes.items()
-    )
+def get_orc_schema(df, arrow_table_schema=None):
+    if arrow_table_schema is None:
+        ordered_dict = OrderedDict(
+            (col_name, get_orc_dtype_info(col_dtype))
+            for col_name, col_dtype in df.dtypes.items()
+        )
+    else:
+        ordered_dict = OrderedDict(
+            (field.name, get_arrow_dtype_info_for_pyorc(field.type))
+            for field in arrow_table_schema
+        )
 
     schema = pyorc.Struct(**ordered_dict)
     return schema
@@ -255,13 +292,25 @@ def pandas_to_avro(df, file_name=None, file_io_obj=None):
         fastavro.writer(file_io_obj, avro_schema, records)
 
 
-def _preprocess_to_orc_tuple(df):
+def _preprocess_to_orc_tuple(df, arrow_table_schema):
     def _null_to_None(value):
         if value is pd.NA or value is pd.NaT:
             return None
         else:
             return value
 
+    def sanitize(value, struct_type):
+        if value is None:
+            return None
+
+        values_list = []
+        for name, sub_type in struct_type.fields.items():
+            if isinstance(sub_type, cudf.StructDtype):
+                values_list.append(sanitize(value[name], sub_type))
+            else:
+                values_list.append(value[name])
+        return tuple(values_list)
+
     has_nulls_or_nullable_dtype = any(
         [
             True
@@ -271,19 +320,35 @@ def _null_to_None(value):
             for col in df.columns
         ]
     )
+    pdf = df.copy(deep=True)
+    for field in arrow_table_schema:
+        if isinstance(field.type, pa.StructType):
+            pdf[field.name] = pdf[field.name].apply(
+                sanitize, args=(cudf.StructDtype.from_arrow(field.type),)
+            )
+        else:
+            pdf[field.name] = pdf[field.name]
 
     tuple_list = [
         tuple(map(_null_to_None, tup)) if has_nulls_or_nullable_dtype else tup
-        for tup in df.itertuples(index=False, name=None)
+        for tup in pdf.itertuples(index=False, name=None)
     ]
 
-    return tuple_list
+    return tuple_list, pdf, df
 
 
-def pandas_to_orc(df, file_name=None, file_io_obj=None, stripe_size=67108864):
-    schema = get_orc_schema(df)
+def pandas_to_orc(
+    df,
+    file_name=None,
+    file_io_obj=None,
+    stripe_size=67108864,
+    arrow_table_schema=None,
+):
+    schema = get_orc_schema(df, arrow_table_schema=arrow_table_schema)
 
-    tuple_list = _preprocess_to_orc_tuple(df)
+    tuple_list, pdf, df = _preprocess_to_orc_tuple(
+        df, arrow_table_schema=arrow_table_schema
+    )
 
     if file_name is not None:
         with open(file_name, "wb") as data:
diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx
index cbba1796c26..ce4f183e795 100644
--- a/python/cudf/cudf/_lib/orc.pyx
+++ b/python/cudf/cudf/_lib/orc.pyx
@@ -9,6 +9,7 @@ from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
+cimport cudf._lib.cpp.io.types as cudf_io_types
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.io.orc cimport (
@@ -144,10 +145,27 @@ cdef compression_type _get_comp_type(object compression):
         raise ValueError(f"Unsupported `compression` type {compression}")
 
 
+cdef cudf_io_types.statistics_freq _get_orc_stat_freq(object statistics):
+    """
+    Convert ORC statistics terms to CUDF convention:
+      - ORC "STRIPE"   == CUDF "ROWGROUP"
+      - ORC "ROWGROUP" == CUDF "PAGE"
+    """
+    statistics = str(statistics).upper()
+    if statistics == "NONE":
+        return cudf_io_types.statistics_freq.STATISTICS_NONE
+    elif statistics == "STRIPE":
+        return cudf_io_types.statistics_freq.STATISTICS_ROWGROUP
+    elif statistics == "ROWGROUP":
+        return cudf_io_types.statistics_freq.STATISTICS_PAGE
+    else:
+        raise ValueError(f"Unsupported `statistics_freq` type {statistics}")
+
+
 cpdef write_orc(table,
                 object path_or_buf,
                 object compression=None,
-                bool enable_statistics=True,
+                object statistics="ROWGROUP",
                 object stripe_size_bytes=None,
                 object stripe_size_rows=None,
                 object row_index_stride=None):
@@ -189,7 +207,7 @@ cpdef write_orc(table,
             sink_info_c, table_view_from_table(table, ignore_index=True)
         ).metadata(tbl_meta.get())
         .compression(compression_)
-        .enable_statistics(<bool> (True if enable_statistics else False))
+        .enable_statistics(_get_orc_stat_freq(statistics))
         .build()
     )
     if stripe_size_bytes is not None:
@@ -268,15 +286,15 @@ cdef class ORCWriter:
     cdef unique_ptr[orc_chunked_writer] writer
     cdef sink_info sink
     cdef unique_ptr[data_sink] _data_sink
-    cdef bool enable_stats
+    cdef cudf_io_types.statistics_freq stat_freq
     cdef compression_type comp_type
     cdef object index
     cdef unique_ptr[table_input_metadata] tbl_meta
 
     def __cinit__(self, object path, object index=None,
-                  object compression=None, bool enable_statistics=True):
+                  object compression=None, object statistics="ROWGROUP"):
         self.sink = make_sink_info(path, self._data_sink)
-        self.enable_stats = enable_statistics
+        self.stat_freq = _get_orc_stat_freq(statistics)
         self.comp_type = _get_comp_type(compression)
         self.index = index
         self.initialized = False
@@ -350,7 +368,7 @@ cdef class ORCWriter:
                 .metadata(self.tbl_meta.get())
                 .key_value_metadata(move(user_data))
                 .compression(self.comp_type)
-                .enable_statistics(self.enable_stats)
+                .enable_statistics(self.stat_freq)
                 .build()
             )
             self.writer.reset(new orc_chunked_writer(args))
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index be5a1e7cc93..b1335c7c076 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -1444,7 +1444,8 @@ def drop_duplicates(
         """
 
         # This utilizes the fact that all `Index` is also a `Frame`.
-        result = self.__class__._from_columns(
+        # Except RangeIndex.
+        return self._from_columns_like_self(
             drop_duplicates(
                 list(self._columns),
                 keys=range(len(self._data)),
@@ -1453,8 +1454,6 @@ def drop_duplicates(
             ),
             self._column_names,
         )
-        result._copy_type_metadata(self, include_index=False)
-        return result
 
     def dropna(self, how="any"):
         """
@@ -1476,12 +1475,10 @@ def dropna(self, how="any"):
             for col in self._columns
         ]
 
-        result = self.__class__._from_columns(
+        return self._from_columns_like_self(
             drop_nulls(data_columns, how=how, keys=range(len(data_columns)),),
             self._column_names,
         )
-        result._copy_type_metadata(self, include_index=False)
-        return result
 
     def _gather(self, gather_map, nullify=False, check_bounds=True):
         """Gather rows of index specified by indices in `gather_map`.
@@ -1501,14 +1498,11 @@ def _gather(self, gather_map, nullify=False, check_bounds=True):
         ):
             raise IndexError("Gather map index is out of bounds.")
 
-        result = self.__class__._from_columns(
+        return self._from_columns_like_self(
             gather(list(self._columns), gather_map, nullify=nullify),
             self._column_names,
         )
 
-        result._copy_type_metadata(self, include_index=False)
-        return result
-
     def take(self, indices, axis=0, allow_fill=True, fill_value=None):
         """Return a new index containing the rows specified by *indices*
 
@@ -1561,12 +1555,10 @@ def _apply_boolean_mask(self, boolean_mask):
         if not is_bool_dtype(boolean_mask.dtype):
             raise ValueError("boolean_mask is not boolean type.")
 
-        result = self.__class__._from_columns(
+        return self._from_columns_like_self(
             apply_boolean_mask(list(self._columns), boolean_mask),
             column_names=self._column_names,
         )
-        result._copy_type_metadata(self)
-        return result
 
     def _split_columns_by_levels(self, levels):
         if isinstance(levels, int) and levels > 0:
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 1d59d9f3b1a..69dc5389e7a 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -163,6 +163,22 @@ def _from_columns(
 
         return cls._from_data(data, index)
 
+    def _from_columns_like_self(
+        self,
+        columns: List[ColumnBase],
+        column_names: List[str],
+        index_names: Optional[List[str]] = None,
+    ):
+        """Construct a `Frame` from a list of columns with metadata from self.
+
+        If `index_names` is set, the first `len(index_names)` columns are
+        used to construct the index of the frame.
+        """
+        frame = self.__class__._from_columns(
+            columns, column_names, index_names
+        )
+        return frame._copy_type_metadata(self, include_index=bool(index_names))
+
     def _mimic_inplace(
         self: T, result: Frame, inplace: bool = False
     ) -> Optional[Frame]:
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 6da98bf980d..a393d8e9457 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -184,11 +184,25 @@ def agg(self, func):
         Parameters
         ----------
         func : str, callable, list or dict
+            Argument specifying the aggregation(s) to perform on the
+            groups. `func` can be any of the following:
+
+              - string: the name of a supported aggregation
+              - callable: a function that accepts a Series/DataFrame and
+                performs a supported operation on it.
+              - list: a list of strings/callables specifying the
+                aggregations to perform on every column.
+              - dict: a mapping of column names to string/callable
+                specifying the aggregations to perform on those
+                columns.
+
+        See :ref:`the user guide <basics.groupby>` for supported
+        aggregations.
 
         Returns
         -------
         A Series or DataFrame containing the combined results of the
-        aggregation.
+        aggregation(s).
 
         Examples
         --------
@@ -655,6 +669,54 @@ def rolling_avg(val, avg):
         kwargs.update({"chunks": offsets})
         return grouped_values.apply_chunks(function, **kwargs)
 
+    def transform(self, function):
+        """Apply an aggregation, then broadcast the result to the group size.
+
+        Parameters
+        ----------
+        function: str or callable
+            Aggregation to apply to each group. Note that the set of
+            operations currently supported by `transform` is identical
+            to that supported by the `agg` method.
+
+        Returns
+        -------
+        A Series or DataFrame of the same size as the input, with the
+        result of the aggregation per group broadcasted to the group
+        size.
+
+        Examples
+        --------
+        .. code-block:: python
+
+          import cudf
+          df = cudf.DataFrame({'a': [2, 1, 1, 2, 2], 'b': [1, 2, 3, 4, 5]})
+          df.groupby('a').transform('max')
+             b
+          0  5
+          1  3
+          2  3
+          3  5
+          4  5
+
+        See also
+        --------
+        cudf.core.groupby.GroupBy.agg
+        """
+        try:
+            result = self.agg(function)
+        except TypeError as e:
+            raise NotImplementedError(
+                "Currently, `transform()` supports only aggregations."
+            ) from e
+
+        if not result.index.equals(self.grouping.keys):
+            result = result._align_to_index(
+                self.grouping.keys, how="right", allow_non_unique=True
+            )
+            result = result.reset_index(drop=True)
+        return result
+
     def rolling(self, *args, **kwargs):
         """
         Returns a `RollingGroupby` object that enables rolling window
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 9458057894a..e9f2de1cb1c 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -556,7 +556,7 @@ def _gather(
         ):
             raise IndexError("Gather map index is out of bounds.")
 
-        result = self.__class__._from_columns(
+        return self._from_columns_like_self(
             libcudf.copying.gather(
                 list(self._index._columns + self._columns)
                 if keep_index
@@ -568,9 +568,6 @@ def _gather(
             self._index.names if keep_index else None,
         )
 
-        result._copy_type_metadata(self, include_index=keep_index)
-        return result
-
     def _positions_from_column_names(
         self, column_names, offset_by_index_columns=False
     ):
@@ -628,7 +625,7 @@ def drop_duplicates(
         keys = self._positions_from_column_names(
             subset, offset_by_index_columns=not ignore_index
         )
-        result = self.__class__._from_columns(
+        return self._from_columns_like_self(
             libcudf.stream_compaction.drop_duplicates(
                 list(self._columns)
                 if ignore_index
@@ -640,8 +637,6 @@ def drop_duplicates(
             self._column_names,
             self._index.names if not ignore_index else None,
         )
-        result._copy_type_metadata(self)
-        return result
 
     def add_prefix(self, prefix):
         """
@@ -1354,7 +1349,7 @@ def _drop_na_rows(
                 for col in self._columns
             ]
 
-        result = self.__class__._from_columns(
+        return self._from_columns_like_self(
             libcudf.stream_compaction.drop_nulls(
                 list(self._index._data.columns) + data_columns,
                 how=how,
@@ -1366,8 +1361,6 @@ def _drop_na_rows(
             self._column_names,
             self._index.names,
         )
-        result._copy_type_metadata(self)
-        return result
 
     def _apply_boolean_mask(self, boolean_mask):
         """Apply boolean mask to each row of `self`.
@@ -1378,15 +1371,13 @@ def _apply_boolean_mask(self, boolean_mask):
         if not is_bool_dtype(boolean_mask.dtype):
             raise ValueError("boolean_mask is not boolean type.")
 
-        result = self.__class__._from_columns(
+        return self._from_columns_like_self(
             libcudf.stream_compaction.apply_boolean_mask(
                 list(self._index._columns + self._columns), boolean_mask
             ),
             column_names=self._column_names,
             index_names=self._index.names,
         )
-        result._copy_type_metadata(self)
-        return result
 
     def take(self, indices, axis=0):
         """Return a new frame containing the rows specified by *indices*.
diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
index 01f1fdf9020..4694243ad18 100644
--- a/python/cudf/cudf/io/csv.py
+++ b/python/cudf/cudf/io/csv.py
@@ -59,17 +59,10 @@ def read_csv(
             "`read_csv` does not yet support reading multiple files"
         )
 
-    # Only need to pass byte_ranges to get_filepath_or_buffer
-    # if `use_python_file_object=False`
-    byte_ranges = None
-    if not use_python_file_object and byte_range:
-        byte_ranges = [byte_range]
-
     filepath_or_buffer, compression = ioutils.get_filepath_or_buffer(
         path_or_data=filepath_or_buffer,
         compression=compression,
         iotypes=(BytesIO, StringIO, NativeFile),
-        byte_ranges=byte_ranges,
         use_python_file_object=use_python_file_object,
         **kwargs,
     )
diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
index c1cce3f996f..5c35d004ac0 100644
--- a/python/cudf/cudf/io/orc.py
+++ b/python/cudf/cudf/io/orc.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 import datetime
 import warnings
@@ -395,7 +395,7 @@ def to_orc(
     df,
     fname,
     compression=None,
-    enable_statistics=True,
+    statistics="ROWGROUP",
     stripe_size_bytes=None,
     stripe_size_rows=None,
     row_index_stride=None,
@@ -431,7 +431,7 @@ def to_orc(
                 df,
                 file_obj,
                 compression,
-                enable_statistics,
+                statistics,
                 stripe_size_bytes,
                 stripe_size_rows,
                 row_index_stride,
@@ -441,7 +441,7 @@ def to_orc(
             df,
             path_or_buf,
             compression,
-            enable_statistics,
+            statistics,
             stripe_size_bytes,
             stripe_size_rows,
             row_index_stride,
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index 3e73e0c9e3d..a919b00692d 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -1,14 +1,11 @@
 # Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
-import io
-import json
 import warnings
 from collections import defaultdict
 from contextlib import ExitStack
 from typing import Dict, List, Tuple
 from uuid import uuid4
 
-import fsspec
 import numpy as np
 from pyarrow import dataset as ds, parquet as pq
 
@@ -310,103 +307,6 @@ def _process_dataset(
     )
 
 
-def _get_byte_ranges(file_list, row_groups, columns, fs, **kwargs):
-
-    # This utility is used to collect the footer metadata
-    # from a parquet file. This metadata is used to define
-    # the exact byte-ranges that will be needed to read the
-    # target column-chunks from the file.
-    #
-    # This utility is only used for remote storage.
-    #
-    # The calculated byte-range information is used within
-    # cudf.io.ioutils.get_filepath_or_buffer (which uses
-    # _fsspec_data_transfer to convert non-local fsspec file
-    # objects into local byte buffers).
-
-    if row_groups is None:
-        if columns is None:
-            return None, None, None  # No reason to construct this
-        row_groups = [None for path in file_list]
-
-    # Construct a list of required byte-ranges for every file
-    all_byte_ranges, all_footers, all_sizes = [], [], []
-    for path, rgs in zip(file_list, row_groups):
-
-        # Step 0 - Get size of file
-        if fs is None:
-            file_size = path.size
-        else:
-            file_size = fs.size(path)
-
-        # Step 1 - Get 32 KB from tail of file.
-        #
-        # This "sample size" can be tunable, but should
-        # always be >= 8 bytes (so we can read the footer size)
-        tail_size = min(kwargs.get("footer_sample_size", 32_000), file_size,)
-        if fs is None:
-            path.seek(file_size - tail_size)
-            footer_sample = path.read(tail_size)
-        else:
-            footer_sample = fs.tail(path, tail_size)
-
-        # Step 2 - Read the footer size and re-read a larger
-        #          tail if necessary
-        footer_size = int.from_bytes(footer_sample[-8:-4], "little")
-        if tail_size < (footer_size + 8):
-            if fs is None:
-                path.seek(file_size - (footer_size + 8))
-                footer_sample = path.read(footer_size + 8)
-            else:
-                footer_sample = fs.tail(path, footer_size + 8)
-
-        # Step 3 - Collect required byte ranges
-        byte_ranges = []
-        md = pq.ParquetFile(io.BytesIO(footer_sample)).metadata
-        column_set = None if columns is None else set(columns)
-        if column_set is not None:
-            schema = md.schema.to_arrow_schema()
-            has_pandas_metadata = (
-                schema.metadata is not None and b"pandas" in schema.metadata
-            )
-            if has_pandas_metadata:
-                md_index = [
-                    ind
-                    for ind in json.loads(
-                        schema.metadata[b"pandas"].decode("utf8")
-                    ).get("index_columns", [])
-                    # Ignore RangeIndex information
-                    if not isinstance(ind, dict)
-                ]
-                column_set |= set(md_index)
-        for r in range(md.num_row_groups):
-            # Skip this row-group if we are targetting
-            # specific row-groups
-            if rgs is None or r in rgs:
-                row_group = md.row_group(r)
-                for c in range(row_group.num_columns):
-                    column = row_group.column(c)
-                    name = column.path_in_schema
-                    # Skip this column if we are targetting a
-                    # specific columns
-                    split_name = name.split(".")[0]
-                    if (
-                        column_set is None
-                        or name in column_set
-                        or split_name in column_set
-                    ):
-                        file_offset0 = column.dictionary_page_offset
-                        if file_offset0 is None:
-                            file_offset0 = column.data_page_offset
-                        num_bytes = column.total_compressed_size
-                        byte_ranges.append((file_offset0, num_bytes))
-
-        all_byte_ranges.append(byte_ranges)
-        all_footers.append(footer_sample)
-        all_sizes.append(file_size)
-    return all_byte_ranges, all_footers, all_sizes
-
-
 @ioutils.doc_read_parquet()
 def read_parquet(
     filepath_or_buffer,
@@ -418,13 +318,24 @@ def read_parquet(
     num_rows=None,
     strings_to_categorical=False,
     use_pandas_metadata=True,
-    use_python_file_object=False,
+    use_python_file_object=True,
     categorical_partitions=True,
+    open_file_options=None,
     *args,
     **kwargs,
 ):
     """{docstring}"""
 
+    # Do not allow the user to set file-opening options
+    # when `use_python_file_object=False` is specified
+    if use_python_file_object is False:
+        if open_file_options:
+            raise ValueError(
+                "open_file_options is not currently supported when "
+                "use_python_file_object is set to False."
+            )
+        open_file_options = {}
+
     # Multiple sources are passed as a list. If a single source is passed,
     # wrap it in a list for unified processing downstream.
     if not is_list_like(filepath_or_buffer):
@@ -470,38 +381,18 @@ def read_parquet(
         raise ValueError("cudf cannot apply filters to open file objects.")
     filepath_or_buffer = paths if paths else filepath_or_buffer
 
-    # Check if we should calculate the specific byte-ranges
-    # needed for each parquet file. We always do this when we
-    # have a file-system object to work with and it is not a
-    # local filesystem object. We can also do it without a
-    # file-system object for `AbstractBufferedFile` buffers
-    byte_ranges, footers, file_sizes = None, None, None
-    if not use_python_file_object:
-        need_byte_ranges = fs is not None and not ioutils._is_local_filesystem(
-            fs
-        )
-        if need_byte_ranges or (
-            filepath_or_buffer
-            and isinstance(
-                filepath_or_buffer[0], fsspec.spec.AbstractBufferedFile,
-            )
-        ):
-            byte_ranges, footers, file_sizes = _get_byte_ranges(
-                filepath_or_buffer, row_groups, columns, fs, **kwargs
-            )
-
     filepaths_or_buffers = []
+    if use_python_file_object:
+        open_file_options = _default_open_file_options(
+            open_file_options, columns, row_groups, fs=fs,
+        )
     for i, source in enumerate(filepath_or_buffer):
-
         tmp_source, compression = ioutils.get_filepath_or_buffer(
             path_or_data=source,
             compression=None,
             fs=fs,
-            byte_ranges=byte_ranges[i] if byte_ranges else None,
-            footer=footers[i] if footers else None,
-            file_size=file_sizes[i] if file_sizes else None,
-            add_par1_magic=True,
             use_python_file_object=use_python_file_object,
+            open_file_options=open_file_options,
             **kwargs,
         )
 
@@ -953,3 +844,41 @@ def __enter__(self):
 
     def __exit__(self, *args):
         self.close()
+
+
+def _default_open_file_options(
+    open_file_options, columns, row_groups, fs=None
+):
+    """
+    Set default fields in open_file_options.
+
+    Copies and updates `open_file_options` to
+    include column and row-group information
+    under the "precache_options" key. By default,
+    we set "method" to "parquet", but precaching
+    will be disabled if the user chooses `method=None`
+
+    Parameters
+    ----------
+    open_file_options : dict or None
+    columns : list
+    row_groups : list
+    fs : fsspec.AbstractFileSystem, Optional
+    """
+    if fs and ioutils._is_local_filesystem(fs):
+        # Quick return for local fs
+        return open_file_options or {}
+    # Assume remote storage if `fs` was not specified
+    open_file_options = (open_file_options or {}).copy()
+    precache_options = open_file_options.pop("precache_options", {}).copy()
+    if precache_options.get("method", "parquet") == "parquet":
+        precache_options.update(
+            {
+                "method": "parquet",
+                "engine": precache_options.get("engine", "pyarrow"),
+                "columns": columns,
+                "row_groups": row_groups,
+            }
+        )
+    open_file_options["precache_options"] = precache_options
+    return open_file_options
diff --git a/python/cudf/cudf/testing/dataset_generator.py b/python/cudf/cudf/testing/dataset_generator.py
index 13be158ed78..e1c7b42c7a3 100644
--- a/python/cudf/cudf/testing/dataset_generator.py
+++ b/python/cudf/cudf/testing/dataset_generator.py
@@ -133,7 +133,25 @@ def _generate_column(column_params, num_rows):
         else:
             arrow_type = None
 
-        if not isinstance(arrow_type, pa.lib.Decimal128Type):
+        if isinstance(column_params.dtype, cudf.StructDtype):
+            vals = pa.StructArray.from_arrays(
+                column_params.generator,
+                names=column_params.dtype.fields.keys(),
+                mask=pa.array(
+                    np.random.choice(
+                        [True, False],
+                        size=num_rows,
+                        p=[
+                            column_params.null_frequency,
+                            1 - column_params.null_frequency,
+                        ],
+                    )
+                )
+                if column_params.null_frequency > 0.0
+                else None,
+            )
+            return vals
+        elif not isinstance(arrow_type, pa.lib.Decimal128Type):
             vals = pa.array(
                 column_params.generator,
                 size=column_params.cardinality,
@@ -352,6 +370,30 @@ def rand_dataframe(
                     dtype=dtype,
                 )
             )
+        elif dtype == "struct":
+            nesting_max_depth = meta["nesting_max_depth"]
+            max_types_at_each_level = meta["max_types_at_each_level"]
+            max_null_frequency = meta["max_null_frequency"]
+            nesting_depth = np.random.randint(1, nesting_max_depth)
+            structDtype = create_nested_struct_type(
+                max_types_at_each_level=max_types_at_each_level,
+                nesting_level=nesting_depth,
+            )
+
+            column_params.append(
+                ColumnParameters(
+                    cardinality=cardinality,
+                    null_frequency=null_frequency,
+                    generator=struct_generator(
+                        dtype=structDtype,
+                        cardinality=cardinality,
+                        size=rows,
+                        max_null_frequency=max_null_frequency,
+                    ),
+                    is_sorted=False,
+                    dtype=structDtype,
+                )
+            )
         elif dtype == "decimal64":
             max_precision = meta.get(
                 "max_precision", cudf.Decimal64Dtype.MAX_PRECISION
@@ -600,11 +642,15 @@ def decimal_generator(dtype, size):
     )
 
 
-def get_values_for_nested_data(dtype, lists_max_length):
+def get_values_for_nested_data(dtype, lists_max_length=None, size=None):
     """
     Returns list of values based on dtype.
     """
-    cardinality = np.random.randint(0, lists_max_length)
+    if size is None:
+        cardinality = np.random.randint(0, lists_max_length)
+    else:
+        cardinality = size
+
     dtype = cudf.dtype(dtype)
     if dtype.kind in ("i", "u"):
         values = int_generator(dtype=dtype, size=cardinality)()
@@ -628,12 +674,7 @@ def get_values_for_nested_data(dtype, lists_max_length):
     else:
         raise TypeError(f"Unsupported dtype: {dtype}")
 
-    # To ensure numpy arrays are not passed as input to
-    # list constructor, returning a python list object here.
-    if isinstance(values, np.ndarray):
-        return values.tolist()
-    else:
-        return values
+    return values
 
 
 def make_lists(dtype, lists_max_length, nesting_depth, top_level_list):
@@ -657,9 +698,40 @@ def make_lists(dtype, lists_max_length, nesting_depth, top_level_list):
         top_level_list = get_values_for_nested_data(
             dtype=dtype, lists_max_length=lists_max_length
         )
+        # To ensure numpy arrays are not passed as input to
+        # list constructor, returning a python list object here.
+        if isinstance(top_level_list, np.ndarray):
+            top_level_list = top_level_list.tolist()
+
     return top_level_list
 
 
+def make_array_for_struct(dtype, cardinality, size, max_null_frequency):
+    """
+    Helper to create a pa.array with `size` and `dtype`
+    for a `StructArray`.
+    """
+
+    null_frequency = np.random.uniform(low=0, high=max_null_frequency)
+    local_cardinality = max(np.random.randint(low=0, high=cardinality), 1)
+    data = get_values_for_nested_data(
+        dtype=dtype.type.to_pandas_dtype(), size=local_cardinality
+    )
+    vals = np.random.choice(data, size=size)
+
+    return pa.array(
+        vals,
+        mask=np.random.choice(
+            [True, False], size=size, p=[null_frequency, 1 - null_frequency],
+        )
+        if null_frequency > 0.0
+        else None,
+        size=size,
+        safe=False,
+        type=dtype.type,
+    )
+
+
 def get_nested_lists(dtype, size, nesting_depth, lists_max_length):
     """
     Returns a list of nested lists with random nesting
@@ -680,6 +752,34 @@ def get_nested_lists(dtype, size, nesting_depth, lists_max_length):
     return list_of_lists
 
 
+def get_nested_structs(dtype, cardinality, size, max_null_frequency):
+    """
+    Returns a list of arrays with random data
+    corresponding to the dtype provided.
+    ``dtype`` here should be a ``cudf.StructDtype``
+    """
+    list_of_arrays = []
+
+    for name, col_dtype in dtype.fields.items():
+        if isinstance(col_dtype, cudf.StructDtype):
+            result_arrays = get_nested_structs(
+                col_dtype, cardinality, size, max_null_frequency
+            )
+            result_arrays = pa.StructArray.from_arrays(
+                result_arrays, names=col_dtype.fields.keys()
+            )
+        else:
+            result_arrays = make_array_for_struct(
+                dtype=dtype._typ[name],
+                cardinality=cardinality,
+                size=size,
+                max_null_frequency=max_null_frequency,
+            )
+        list_of_arrays.append(result_arrays)
+
+    return list_of_arrays
+
+
 def list_generator(dtype, size, nesting_depth, lists_max_length):
     """
     Generator for list data
@@ -690,3 +790,29 @@ def list_generator(dtype, size, nesting_depth, lists_max_length):
         nesting_depth=nesting_depth,
         lists_max_length=lists_max_length,
     )
+
+
+def struct_generator(dtype, cardinality, size, max_null_frequency):
+    """
+    Generator for struct data
+    """
+    return lambda: get_nested_structs(
+        dtype=dtype,
+        cardinality=cardinality,
+        size=size,
+        max_null_frequency=max_null_frequency,
+    )
+
+
+def create_nested_struct_type(max_types_at_each_level, nesting_level):
+    dtypes_list = cudf.utils.dtypes.ALL_TYPES
+    picked_types = np.random.choice(list(dtypes_list), max_types_at_each_level)
+    type_dict = {}
+    for name, type_ in enumerate(picked_types):
+        if type_ == "struct":
+            type_dict[str(name)] = create_nested_struct_type(
+                max_types_at_each_level, nesting_level - 1
+            )
+        else:
+            type_dict[str(name)] = cudf.dtype(type_)
+    return cudf.StructDtype(type_dict)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 3e359335719..fa7680df8a5 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -5474,11 +5474,17 @@ def test_memory_usage_list():
 @pytest.mark.parametrize("rows", [10, 100])
 def test_memory_usage_multi(rows):
     deep = True
+    # We need to sample without replacement to guarantee that the size of the
+    # levels are always the same.
     df = pd.DataFrame(
         {
             "A": np.arange(rows, dtype="int32"),
-            "B": np.random.choice(np.arange(3, dtype="int64"), rows),
-            "C": np.random.choice(np.arange(3, dtype="float64"), rows),
+            "B": np.random.choice(
+                np.arange(rows, dtype="int64"), rows, replace=False
+            ),
+            "C": np.random.choice(
+                np.arange(rows, dtype="float64"), rows, replace=False
+            ),
         }
     ).set_index(["B", "C"])
     gdf = cudf.from_pandas(df)
@@ -5486,8 +5492,8 @@ def test_memory_usage_multi(rows):
     # of the underlying columns, levels, and codes
     expect = rows * 16  # Source Columns
     expect += rows * 16  # Codes
-    expect += 3 * 8  # Level 0
-    expect += 3 * 8  # Level 1
+    expect += rows * 8  # Level 0
+    expect += rows * 8  # Level 1
 
     assert expect == gdf.index.memory_usage(deep=deep)
 
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index c73e96de470..f5decd62ea9 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -2362,6 +2362,28 @@ def test_groupby_get_group(pdf, group, name, obj):
     assert_groupby_results_equal(expected, actual)
 
 
+@pytest.mark.parametrize(
+    "by",
+    [
+        "a",
+        ["a", "b"],
+        pd.Series([2, 1, 1, 2, 2]),
+        pd.Series(["b", "a", "a", "b", "b"]),
+    ],
+)
+@pytest.mark.parametrize("agg", ["sum", "mean", lambda df: df.mean()])
+def test_groupby_transform_aggregation(by, agg):
+    gdf = cudf.DataFrame(
+        {"a": [2, 2, 1, 2, 1], "b": [1, 1, 1, 2, 2], "c": [1, 2, 3, 4, 5]}
+    )
+    pdf = gdf.to_pandas()
+
+    expected = pdf.groupby(by).transform(agg)
+    actual = gdf.groupby(by).transform(agg)
+
+    assert_groupby_results_equal(expected, actual)
+
+
 def test_groupby_select_then_ffill():
     pdf = pd.DataFrame(
         {
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 44812f5aba4..8689f773a02 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -391,6 +391,64 @@ def test_orc_writer(datadir, tmpdir, reference_file, columns, compression):
     assert_eq(expect, got)
 
 
+@pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"])
+def test_orc_writer_statistics_frequency(datadir, tmpdir, stats_freq):
+    reference_file = "TestOrcFile.demo-12-zlib.orc"
+    pdf_fname = datadir / reference_file
+    gdf_fname = tmpdir.join("gdf.orc")
+
+    try:
+        orcfile = pa.orc.ORCFile(pdf_fname)
+    except Exception as excpr:
+        if type(excpr).__name__ == "ArrowIOError":
+            pytest.skip(".orc file is not found")
+        else:
+            print(type(excpr).__name__)
+
+    expect = orcfile.read().to_pandas()
+    cudf.from_pandas(expect).to_orc(gdf_fname.strpath, statistics=stats_freq)
+    got = pa.orc.ORCFile(gdf_fname).read().to_pandas()
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"])
+def test_chunked_orc_writer_statistics_frequency(datadir, tmpdir, stats_freq):
+    reference_file = "TestOrcFile.test1.orc"
+    pdf_fname = datadir / reference_file
+    gdf_fname = tmpdir.join("chunked_gdf.orc")
+
+    try:
+        orcfile = pa.orc.ORCFile(pdf_fname)
+    except Exception as excpr:
+        if type(excpr).__name__ == "ArrowIOError":
+            pytest.skip(".orc file is not found")
+        else:
+            print(type(excpr).__name__)
+
+    columns = [
+        "boolean1",
+        "byte1",
+        "short1",
+        "int1",
+        "long1",
+        "float1",
+        "double1",
+    ]
+    pdf = orcfile.read(columns=columns).to_pandas()
+    gdf = cudf.from_pandas(pdf)
+    expect = pd.concat([pdf, pdf]).reset_index(drop=True)
+
+    writer = ORCWriter(gdf_fname, statistics=stats_freq)
+    writer.write_table(gdf)
+    writer.write_table(gdf)
+    writer.close()
+
+    got = pa.orc.ORCFile(gdf_fname).read().to_pandas()
+
+    assert_eq(expect, got)
+
+
 @pytest.mark.parametrize("compression", [None, "snappy"])
 @pytest.mark.parametrize(
     "reference_file, columns",
@@ -592,8 +650,9 @@ def normalized_equals(value1, value2):
     return value1 == value2
 
 
+@pytest.mark.parametrize("stats_freq", ["STRIPE", "ROWGROUP"])
 @pytest.mark.parametrize("nrows", [1, 100, 6000000])
-def test_orc_write_statistics(tmpdir, datadir, nrows):
+def test_orc_write_statistics(tmpdir, datadir, nrows, stats_freq):
     supported_stat_types = supported_numpy_dtypes + ["str"]
     # Can't write random bool columns until issue #6763 is fixed
     if nrows == 6000000:
@@ -609,7 +668,7 @@ def test_orc_write_statistics(tmpdir, datadir, nrows):
     fname = tmpdir.join("gdf.orc")
 
     # Write said dataframe to ORC with cuDF
-    gdf.to_orc(fname.strpath)
+    gdf.to_orc(fname.strpath, statistics=stats_freq)
 
     # Read back written ORC's statistics
     orc_file = pa.orc.ORCFile(fname)
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 519f24b7ca6..21556aad1eb 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -748,7 +748,10 @@ def test_parquet_reader_arrow_nativefile(parquet_path_or_buf):
     assert_eq(expect, got)
 
 
-def test_parquet_reader_use_python_file_object(parquet_path_or_buf):
+@pytest.mark.parametrize("use_python_file_object", [True, False])
+def test_parquet_reader_use_python_file_object(
+    parquet_path_or_buf, use_python_file_object
+):
     # Check that the non-default `use_python_file_object=True`
     # option works as expected
     expect = cudf.read_parquet(parquet_path_or_buf("filepath"))
@@ -756,11 +759,15 @@ def test_parquet_reader_use_python_file_object(parquet_path_or_buf):
 
     # Pass open fsspec file
     with fs.open(paths[0], mode="rb") as fil:
-        got1 = cudf.read_parquet(fil, use_python_file_object=True)
+        got1 = cudf.read_parquet(
+            fil, use_python_file_object=use_python_file_object
+        )
     assert_eq(expect, got1)
 
     # Pass path only
-    got2 = cudf.read_parquet(paths[0], use_python_file_object=True)
+    got2 = cudf.read_parquet(
+        paths[0], use_python_file_object=use_python_file_object
+    )
     assert_eq(expect, got2)
 
 
diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py
index 5738e1f0d00..da1ffc1fc16 100644
--- a/python/cudf/cudf/tests/test_s3.py
+++ b/python/cudf/cudf/tests/test_s3.py
@@ -131,6 +131,9 @@ def pdf_ext(scope="module"):
     df["Integer"] = np.array([i for i in range(size)])
     df["List"] = [[i] for i in range(size)]
     df["Struct"] = [{"a": i} for i in range(size)]
+    df["String"] = (["Alpha", "Beta", "Gamma", "Delta"] * (-(size // -4)))[
+        :size
+    ]
     return df
 
 
@@ -225,9 +228,16 @@ def test_write_csv(s3_base, s3so, pdf, chunksize):
 
 @pytest.mark.parametrize("bytes_per_thread", [32, 1024])
 @pytest.mark.parametrize("columns", [None, ["Float", "String"]])
-@pytest.mark.parametrize("use_python_file_object", [False, True])
+@pytest.mark.parametrize("precache", [None, "parquet"])
+@pytest.mark.parametrize("use_python_file_object", [True, False])
 def test_read_parquet(
-    s3_base, s3so, pdf, bytes_per_thread, columns, use_python_file_object
+    s3_base,
+    s3so,
+    pdf,
+    bytes_per_thread,
+    columns,
+    precache,
+    use_python_file_object,
 ):
     fname = "test_parquet_reader.parquet"
     bname = "parquet"
@@ -239,10 +249,15 @@ def test_read_parquet(
     with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}):
         got1 = cudf.read_parquet(
             "s3://{}/{}".format(bname, fname),
-            use_python_file_object=use_python_file_object,
+            open_file_options=(
+                {"precache_options": {"method": precache}}
+                if use_python_file_object
+                else None
+            ),
             storage_options=s3so,
             bytes_per_thread=bytes_per_thread,
             columns=columns,
+            use_python_file_object=use_python_file_object,
         )
     expect = pdf[columns] if columns else pdf
     assert_eq(expect, got1)
@@ -256,25 +271,18 @@ def test_read_parquet(
         with fs.open("s3://{}/{}".format(bname, fname), mode="rb") as f:
             got2 = cudf.read_parquet(
                 f,
-                use_python_file_object=use_python_file_object,
                 bytes_per_thread=bytes_per_thread,
                 columns=columns,
+                use_python_file_object=use_python_file_object,
             )
     assert_eq(expect, got2)
 
 
 @pytest.mark.parametrize("bytes_per_thread", [32, 1024])
 @pytest.mark.parametrize("columns", [None, ["List", "Struct"]])
-@pytest.mark.parametrize("use_python_file_object", [False, True])
 @pytest.mark.parametrize("index", [None, "Integer"])
 def test_read_parquet_ext(
-    s3_base,
-    s3so,
-    pdf_ext,
-    bytes_per_thread,
-    columns,
-    use_python_file_object,
-    index,
+    s3_base, s3so, pdf_ext, bytes_per_thread, columns, index,
 ):
     fname = "test_parquet_reader_ext.parquet"
     bname = "parquet"
@@ -290,7 +298,6 @@ def test_read_parquet_ext(
     with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}):
         got1 = cudf.read_parquet(
             "s3://{}/{}".format(bname, fname),
-            use_python_file_object=use_python_file_object,
             storage_options=s3so,
             bytes_per_thread=bytes_per_thread,
             footer_sample_size=3200,
@@ -326,12 +333,12 @@ def test_read_parquet_arrow_nativefile(s3_base, s3so, pdf, columns):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("python_file", [True, False])
-def test_read_parquet_filters(s3_base, s3so, pdf, python_file):
+@pytest.mark.parametrize("precache", [None, "parquet"])
+def test_read_parquet_filters(s3_base, s3so, pdf_ext, precache):
     fname = "test_parquet_reader_filters.parquet"
     bname = "parquet"
     buffer = BytesIO()
-    pdf.to_parquet(path=buffer)
+    pdf_ext.to_parquet(path=buffer)
     buffer.seek(0)
     filters = [("String", "==", "Omega")]
     with s3_context(s3_base=s3_base, bucket=bname, files={fname: buffer}):
@@ -339,11 +346,11 @@ def test_read_parquet_filters(s3_base, s3so, pdf, python_file):
             "s3://{}/{}".format(bname, fname),
             storage_options=s3so,
             filters=filters,
-            use_python_file_object=python_file,
+            open_file_options={"precache_options": {"method": precache}},
         )
 
     # All row-groups should be filtered out
-    assert_eq(pdf.iloc[:0], got.reset_index(drop=True))
+    assert_eq(pdf_ext.iloc[:0], got.reset_index(drop=True))
 
 
 @pytest.mark.parametrize("partition_cols", [None, ["String"]])
diff --git a/python/cudf/cudf/utils/gpu_utils.py b/python/cudf/cudf/utils/gpu_utils.py
index dbdd68f2df8..8947760e052 100644
--- a/python/cudf/cudf/utils/gpu_utils.py
+++ b/python/cudf/cudf/utils/gpu_utils.py
@@ -15,10 +15,10 @@ def validate_setup():
 
     import warnings
 
+    from cuda.cudart import cudaDeviceAttr, cudaError_t
+
     from rmm._cuda.gpu import (
         CUDARuntimeError,
-        cudaDeviceAttr,
-        cudaError,
         deviceGetName,
         driverGetVersion,
         getDeviceAttribute,
@@ -30,30 +30,30 @@ def _try_get_old_or_new_symbols():
         try:
             # CUDA 10.2+ symbols
             return [
-                cudaError.cudaErrorDeviceUninitialized,
-                cudaError.cudaErrorTimeout,
+                cudaError_t.cudaErrorDeviceUninitialized,
+                cudaError_t.cudaErrorTimeout,
             ]
         except AttributeError:
             # CUDA 10.1 symbols
-            return [cudaError.cudaErrorDeviceUninitilialized]
+            return [cudaError_t.cudaErrorDeviceUninitilialized]
 
     notify_caller_errors = {
-        cudaError.cudaErrorInitializationError,
-        cudaError.cudaErrorInsufficientDriver,
-        cudaError.cudaErrorInvalidDeviceFunction,
-        cudaError.cudaErrorInvalidDevice,
-        cudaError.cudaErrorStartupFailure,
-        cudaError.cudaErrorInvalidKernelImage,
-        cudaError.cudaErrorAlreadyAcquired,
-        cudaError.cudaErrorOperatingSystem,
-        cudaError.cudaErrorNotPermitted,
-        cudaError.cudaErrorNotSupported,
-        cudaError.cudaErrorSystemNotReady,
-        cudaError.cudaErrorSystemDriverMismatch,
-        cudaError.cudaErrorCompatNotSupportedOnDevice,
+        cudaError_t.cudaErrorInitializationError,
+        cudaError_t.cudaErrorInsufficientDriver,
+        cudaError_t.cudaErrorInvalidDeviceFunction,
+        cudaError_t.cudaErrorInvalidDevice,
+        cudaError_t.cudaErrorStartupFailure,
+        cudaError_t.cudaErrorInvalidKernelImage,
+        cudaError_t.cudaErrorAlreadyAcquired,
+        cudaError_t.cudaErrorOperatingSystem,
+        cudaError_t.cudaErrorNotPermitted,
+        cudaError_t.cudaErrorNotSupported,
+        cudaError_t.cudaErrorSystemNotReady,
+        cudaError_t.cudaErrorSystemDriverMismatch,
+        cudaError_t.cudaErrorCompatNotSupportedOnDevice,
         *_try_get_old_or_new_symbols(),
-        cudaError.cudaErrorUnknown,
-        cudaError.cudaErrorApiFailureBase,
+        cudaError_t.cudaErrorUnknown,
+        cudaError_t.cudaErrorApiFailureBase,
     }
 
     try:
@@ -68,8 +68,6 @@ def _try_get_old_or_new_symbols():
         # Cupy throws RunTimeException to get GPU count,
         # hence obtaining GPU count by in-house cpp api above
 
-        # 75 - Indicates to get "cudaDevAttrComputeCapabilityMajor" attribute
-        # 0 - Get GPU 0
         major_version = getDeviceAttribute(
             cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, 0
         )
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 6f958860dad..8f8a40ae4ab 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -3,6 +3,7 @@
 import datetime
 import os
 import urllib
+import warnings
 from io import BufferedWriter, BytesIO, IOBase, TextIOWrapper
 from threading import Thread
 
@@ -17,6 +18,13 @@
 
 from cudf.utils.docutils import docfmt_partial
 
+try:
+    import fsspec.parquet as fsspec_parquet
+
+except ImportError:
+    fsspec_parquet = None
+
+
 _docstring_remote_sources = """
 - cuDF supports local and remote data stores. See configuration details for
   available sources
@@ -160,10 +168,17 @@
 use_pandas_metadata : boolean, default True
     If True and dataset has custom PANDAS schema metadata, ensure that index
     columns are also loaded.
-use_python_file_object : boolean, default False
+use_python_file_object : boolean, default True
     If True, Arrow-backed PythonFile objects will be used in place of fsspec
-    AbstractBufferedFile objects at IO time. This option is likely to improve
-    performance when making small reads from larger parquet files.
+    AbstractBufferedFile objects at IO time. Setting this argument to `False`
+    will require the entire file to be copied to host memory, and is highly
+    discouraged.
+open_file_options : dict, optional
+    Dictionary of key-value pairs to pass to the function used to open remote
+    files. By default, this will be `fsspec.parquet.open_parquet_file`. To
+    deactivate optimized precaching, set the "method" to `None` under the
+    "precache_options" key. Note that the `open_file_func` key can also be
+    used to specify a custom file-open function.
 
 Returns
 -------
@@ -1220,6 +1235,100 @@ def _get_filesystem_and_paths(path_or_data, **kwargs):
     return fs, return_paths
 
 
+def _set_context(obj, stack):
+    # Helper function to place open file on context stack
+    if stack is None:
+        return obj
+    return stack.enter_context(obj)
+
+
+def _open_remote_files(
+    paths,
+    fs,
+    context_stack=None,
+    open_file_func=None,
+    precache_options=None,
+    **kwargs,
+):
+    """Return a list of open file-like objects given
+    a list of remote file paths.
+
+    Parameters
+    ----------
+    paths : list(str)
+        List of file-path strings.
+    fs : fsspec.AbstractFileSystem
+        Fsspec file-system object.
+    context_stack : contextlib.ExitStack, Optional
+        Context manager to use for open files.
+    open_file_func : Callable, Optional
+        Call-back function to use for opening. If this argument
+        is specified, all other arguments will be ignored.
+    precache_options : dict, optional
+        Dictionary of key-word arguments to pass to use for
+        precaching. Unless the input contains ``{"method": None}``,
+        ``fsspec.parquet.open_parquet_file`` will be used for remote
+        storage.
+    **kwargs :
+        Key-word arguments to be passed to format-specific
+        open functions.
+    """
+
+    # Just use call-back function if one was specified
+    if open_file_func is not None:
+        return [
+            _set_context(open_file_func(path, **kwargs), context_stack)
+            for path in paths
+        ]
+
+    # Check if the "precache" option is supported.
+    # In the future, fsspec should do this check for us
+    precache_options = (precache_options or {}).copy()
+    precache = precache_options.pop("method", None)
+    if precache not in ("parquet", None):
+        raise ValueError(f"{precache} not a supported `precache` option.")
+
+    # Check that "parts" caching (used for all format-aware file handling)
+    # is supported by the installed fsspec/s3fs version
+    if precache == "parquet" and not fsspec_parquet:
+        warnings.warn(
+            f"This version of fsspec ({fsspec.__version__}) does "
+            f"not support parquet-optimized precaching. Please upgrade "
+            f"to the latest fsspec version for better performance."
+        )
+        precache = None
+
+    if precache == "parquet":
+        # Use fsspec.parquet module.
+        # TODO: Use `cat_ranges` to collect "known"
+        # parts for all files at once.
+        row_groups = precache_options.pop("row_groups", None) or (
+            [None] * len(paths)
+        )
+        return [
+            ArrowPythonFile(
+                _set_context(
+                    fsspec_parquet.open_parquet_file(
+                        path,
+                        fs=fs,
+                        row_groups=rgs,
+                        **precache_options,
+                        **kwargs,
+                    ),
+                    context_stack,
+                )
+            )
+            for path, rgs in zip(paths, row_groups)
+        ]
+
+    # Default open - Use pyarrow filesystem API
+    pa_fs = PyFileSystem(FSSpecHandler(fs))
+    return [
+        _set_context(pa_fs.open_input_file(fpath), context_stack)
+        for fpath in paths
+    ]
+
+
 def get_filepath_or_buffer(
     path_or_data,
     compression,
@@ -1228,6 +1337,7 @@ def get_filepath_or_buffer(
     iotypes=(BytesIO, NativeFile),
     byte_ranges=None,
     use_python_file_object=False,
+    open_file_options=None,
     **kwargs,
 ):
     """Return either a filepath string to data, or a memory buffer of data.
@@ -1249,6 +1359,9 @@ def get_filepath_or_buffer(
     use_python_file_object : boolean, default False
         If True, Arrow-backed PythonFile objects will be used in place
         of fsspec AbstractBufferedFile objects.
+    open_file_options : dict, optional
+        Optional dictionary of key-word arguments to pass to
+        `_open_remote_files` (used for remote storage only).
 
     Returns
     -------
@@ -1282,19 +1395,14 @@ def get_filepath_or_buffer(
 
         else:
             if use_python_file_object:
-                pa_fs = PyFileSystem(FSSpecHandler(fs))
-                path_or_data = [
-                    pa_fs.open_input_file(fpath) for fpath in paths
-                ]
+                path_or_data = _open_remote_files(
+                    paths, fs, **(open_file_options or {}),
+                )
             else:
                 path_or_data = [
                     BytesIO(
                         _fsspec_data_transfer(
-                            fpath,
-                            fs=fs,
-                            mode=mode,
-                            byte_ranges=byte_ranges,
-                            **kwargs,
+                            fpath, fs=fs, mode=mode, **kwargs,
                         )
                     )
                     for fpath in paths
@@ -1309,9 +1417,7 @@ def get_filepath_or_buffer(
             path_or_data = ArrowPythonFile(path_or_data)
         else:
             path_or_data = BytesIO(
-                _fsspec_data_transfer(
-                    path_or_data, mode=mode, byte_ranges=byte_ranges, **kwargs
-                )
+                _fsspec_data_transfer(path_or_data, mode=mode, **kwargs)
             )
 
     return path_or_data, compression
@@ -1545,10 +1651,7 @@ def _ensure_filesystem(passed_filesystem, path, **kwargs):
 def _fsspec_data_transfer(
     path_or_fob,
     fs=None,
-    byte_ranges=None,
-    footer=None,
     file_size=None,
-    add_par1_magic=None,
     bytes_per_thread=256_000_000,
     max_gap=64_000,
     mode="rb",
@@ -1568,48 +1671,22 @@ def _fsspec_data_transfer(
     file_size = file_size or fs.size(path_or_fob)
 
     # Check if a direct read makes the most sense
-    if not byte_ranges and bytes_per_thread >= file_size:
+    if bytes_per_thread >= file_size:
         if file_like:
             return path_or_fob.read()
         else:
-            return fs.open(path_or_fob, mode=mode, cache_type="none").read()
+            return fs.open(path_or_fob, mode=mode, cache_type="all").read()
 
     # Threaded read into "local" buffer
     buf = np.zeros(file_size, dtype="b")
-    if byte_ranges:
-
-        # Optimize/merge the ranges
-        byte_ranges = _merge_ranges(
-            byte_ranges, max_block=bytes_per_thread, max_gap=max_gap,
-        )
-
-        # Call multi-threaded data transfer of
-        # remote byte-ranges to local buffer
-        _read_byte_ranges(
-            path_or_fob, byte_ranges, buf, fs=fs, **kwargs,
-        )
-
-        # Add Header & Footer bytes
-        if footer is not None:
-            footer_size = len(footer)
-            buf[-footer_size:] = np.frombuffer(
-                footer[-footer_size:], dtype="b"
-            )
 
-        # Add parquet magic bytes (optional)
-        if add_par1_magic:
-            buf[:4] = np.frombuffer(b"PAR1", dtype="b")
-            if footer is None:
-                buf[-4:] = np.frombuffer(b"PAR1", dtype="b")
-
-    else:
-        byte_ranges = [
-            (b, min(bytes_per_thread, file_size - b))
-            for b in range(0, file_size, bytes_per_thread)
-        ]
-        _read_byte_ranges(
-            path_or_fob, byte_ranges, buf, fs=fs, **kwargs,
-        )
+    byte_ranges = [
+        (b, min(bytes_per_thread, file_size - b))
+        for b in range(0, file_size, bytes_per_thread)
+    ]
+    _read_byte_ranges(
+        path_or_fob, byte_ranges, buf, fs=fs, **kwargs,
+    )
 
     return buf.tobytes()
 
diff --git a/python/cudf_kafka/cudf_kafka/_lib/.kafka.pxd.swo b/python/cudf_kafka/cudf_kafka/_lib/.kafka.pxd.swo
deleted file mode 100644
index 624b60798ae..00000000000
Binary files a/python/cudf_kafka/cudf_kafka/_lib/.kafka.pxd.swo and /dev/null differ
diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx
index 24d072c544e..bff60e63fdb 100644
--- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx
+++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx
@@ -14,7 +14,12 @@ from cudf_kafka._lib.kafka cimport kafka_consumer
 # To avoid including <python.h> in libcudf_kafka
 # we introduce this wrapper in Cython
 cdef map[string, string] oauth_callback_wrapper(void *ctx):
-    return (<object>(ctx))()
+    resp = (<object>(ctx))()
+    cdef map[string, string] c_resp
+    c_resp[str.encode("token")] = str.encode(resp["token"])
+    c_resp[str.encode("token_expiration_in_epoch")] \
+        = str(resp["token_expiration_in_epoch"]).encode()
+    return c_resp
 
 
 cdef class KafkaDatasource(Datasource):
diff --git a/python/custreamz/dev_requirements.txt b/python/custreamz/dev_requirements.txt
index 6f1c09947d5..b4cd7a649ee 100644
--- a/python/custreamz/dev_requirements.txt
+++ b/python/custreamz/dev_requirements.txt
@@ -3,8 +3,8 @@
 flake8==3.8.3
 black==19.10b0
 isort==5.6.4
-dask>=2021.11.1,<=2021.11.2
-distributed>=2021.11.1,<=2021.11.2
+dask>=2021.11.1,<=2022.01.0
+distributed>=2021.11.1,<=2022.01.0
 streamz
 python-confluent-kafka
 pytest
diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py
index a49d73493ec..ac5795fa2ec 100644
--- a/python/dask_cudf/dask_cudf/io/parquet.py
+++ b/python/dask_cudf/dask_cudf/io/parquet.py
@@ -20,7 +20,9 @@
 import cudf
 from cudf.core.column import as_column, build_categorical_column
 from cudf.io import write_to_dataset
+from cudf.io.parquet import _default_open_file_options
 from cudf.utils.dtypes import cudf_dtype_from_pa_type
+from cudf.utils.ioutils import _is_local_filesystem, _open_remote_files
 
 
 class CudfEngine(ArrowDatasetEngine):
@@ -64,6 +66,7 @@ def _read_paths(
         partitions=None,
         partitioning=None,
         partition_keys=None,
+        open_file_options=None,
         **kwargs,
     ):
 
@@ -75,15 +78,15 @@ def _read_paths(
 
             # Non-local filesystem handling
             paths_or_fobs = paths
-            if not cudf.utils.ioutils._is_local_filesystem(fs):
-
-                # Convert paths to file objects for remote data
-                paths_or_fobs = [
-                    stack.enter_context(
-                        fs.open(path, mode="rb", cache_type="none")
-                    )
-                    for path in paths
-                ]
+            if not _is_local_filesystem(fs):
+                paths_or_fobs = _open_remote_files(
+                    paths_or_fobs,
+                    fs,
+                    context_stack=stack,
+                    **_default_open_file_options(
+                        open_file_options, columns, row_groups
+                    ),
+                )
 
             # Use cudf to read in data
             df = cudf.read_parquet(
@@ -150,6 +153,7 @@ def read_partition(
         partitions=(),
         partitioning=None,
         schema=None,
+        open_file_options=None,
         **kwargs,
     ):
 
@@ -168,7 +172,10 @@ def read_partition(
         if not isinstance(pieces, list):
             pieces = [pieces]
 
+        # Extract supported kwargs from `kwargs`
         strings_to_cats = kwargs.get("strings_to_categorical", False)
+        read_kwargs = kwargs.get("read", {})
+        read_kwargs.update(open_file_options or {})
 
         # Assume multi-piece read
         paths = []
@@ -192,7 +199,7 @@ def read_partition(
                         partitions=partitions,
                         partitioning=partitioning,
                         partition_keys=last_partition_keys,
-                        **kwargs.get("read", {}),
+                        **read_kwargs,
                     )
                 )
                 paths = rgs = []
@@ -215,13 +222,13 @@ def read_partition(
                 partitions=partitions,
                 partitioning=partitioning,
                 partition_keys=last_partition_keys,
-                **kwargs.get("read", {}),
+                **read_kwargs,
             )
         )
         df = cudf.concat(dfs) if len(dfs) > 1 else dfs[0]
 
         # Re-set "object" dtypes align with pa schema
-        set_object_dtypes_from_pa_schema(df, kwargs.get("schema", None))
+        set_object_dtypes_from_pa_schema(df, schema)
 
         if index and (index[0] in df.columns):
             df = df.set_index(index[0])
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_s3.py b/python/dask_cudf/dask_cudf/io/tests/test_s3.py
index ad53f5cfe0f..83ff1273b36 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_s3.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_s3.py
@@ -6,6 +6,7 @@
 from io import BytesIO
 
 import pandas as pd
+import pyarrow.fs as pa_fs
 import pytest
 
 import dask_cudf
@@ -115,7 +116,15 @@ def test_read_csv(s3_base, s3so):
         assert df.a.sum().compute() == 4
 
 
-def test_read_parquet(s3_base, s3so):
+@pytest.mark.parametrize(
+    "open_file_options",
+    [
+        {"precache_options": {"method": None}},
+        {"precache_options": {"method": "parquet"}},
+        {"open_file_func": None},
+    ],
+)
+def test_read_parquet(s3_base, s3so, open_file_options):
     pdf = pd.DataFrame({"a": [1, 2, 3, 4], "b": [2.1, 2.2, 2.3, 2.4]})
     buffer = BytesIO()
     pdf.to_parquet(path=buffer)
@@ -123,8 +132,15 @@ def test_read_parquet(s3_base, s3so):
     with s3_context(
         s3_base=s3_base, bucket="daskparquet", files={"file.parq": buffer}
     ):
+        if "open_file_func" in open_file_options:
+            fs = pa_fs.S3FileSystem(
+                endpoint_override=s3so["client_kwargs"]["endpoint_url"],
+            )
+            open_file_options["open_file_func"] = fs.open_input_file
         df = dask_cudf.read_parquet(
-            "s3://daskparquet/*.parq", storage_options=s3so
+            "s3://daskparquet/*.parq",
+            storage_options=s3so,
+            open_file_options=open_file_options,
         )
         assert df.a.sum().compute() == 10
         assert df.b.sum().compute() == 9
diff --git a/python/dask_cudf/dev_requirements.txt b/python/dask_cudf/dev_requirements.txt
index d8b0745be79..d5ba79d4987 100644
--- a/python/dask_cudf/dev_requirements.txt
+++ b/python/dask_cudf/dev_requirements.txt
@@ -1,7 +1,7 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
 
-dask>=2021.11.1
-distributed>=2021.11.1
+dask>=2021.11.1,<=2022.01.0
+distributed>=2021.11.1,<=2022.01.0
 fsspec>=0.6.0
 numba>=0.53.1
 numpy
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 425839772eb..39491a45e7e 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -10,8 +10,8 @@
 
 install_requires = [
     "cudf",
-    "dask>=2021.11.1",
-    "distributed>=2021.11.1",
+    "dask>=2021.11.1,<=2022.01.0",
+    "distributed>=2021.11.1,<=2022.01.0",
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.4.0dev0",