diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 6c4175a2539..75bfe6c34bc 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -52,7 +52,7 @@ test:
     - test -f $PREFIX/lib/libcudftestutil.a
     - test -f $PREFIX/include/cudf/aggregation.hpp
     - test -f $PREFIX/include/cudf/ast/transform.hpp
-    - test -f $PREFIX/include/cudf/ast/detail/linearizer.hpp
+    - test -f $PREFIX/include/cudf/ast/detail/expression_parser.hpp
     - test -f $PREFIX/include/cudf/ast/detail/operators.hpp
     - test -f $PREFIX/include/cudf/ast/nodes.hpp
     - test -f $PREFIX/include/cudf/ast/operators.hpp
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5c05a58b448..90c17067b55 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -151,7 +151,7 @@ add_library(cudf
     src/aggregation/aggregation.cpp
     src/aggregation/aggregation.cu
     src/aggregation/result_cache.cpp
-    src/ast/linearizer.cpp
+    src/ast/expression_parser.cpp
     src/ast/transform.cu
     src/binaryop/binaryop.cpp
     src/binaryop/compiled/binary_ops.cu
diff --git a/cpp/include/cudf/ast/detail/expression_parser.hpp b/cpp/include/cudf/ast/detail/expression_parser.hpp
new file mode 100644
index 00000000000..db8845825c5
--- /dev/null
+++ b/cpp/include/cudf/ast/detail/expression_parser.hpp
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/ast/nodes.hpp>
+#include <cudf/ast/operators.hpp>
+#include <cudf/scalar/scalar_device_view.cuh>
+#include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
+
+#include <thrust/optional.h>
+
+#include <numeric>
+
+namespace cudf {
+namespace ast {
+namespace detail {
+
+/**
+ * @brief Enum defining data reference types used by a node.
+ *
+ * This enum is device-specific. For instance, intermediate data references are generated by the
+ * linearization process but cannot be explicitly created by the user.
+ */
+enum class device_data_reference_type {
+  COLUMN,       // A value in a table column
+  LITERAL,      // A literal value
+  INTERMEDIATE  // An internal temporary value
+};
+
+/**
+ * @brief A device data reference describes a source of data used by a node.
+ *
+ * This is a POD class used to create references describing data type and locations for consumption
+ * by the `row_evaluator`.
+ */
+struct alignas(8) device_data_reference {
+  device_data_reference(device_data_reference_type reference_type,
+                        cudf::data_type data_type,
+                        cudf::size_type data_index,
+                        table_reference table_source);
+
+  device_data_reference(device_data_reference_type reference_type,
+                        cudf::data_type data_type,
+                        cudf::size_type data_index);
+
+  device_data_reference_type const reference_type;  // Source of data
+  cudf::data_type const data_type;                  // Type of data
+  cudf::size_type const data_index;                 // The column index of a table, index of a
+                                                    // literal, or index of an intermediate
+  table_reference const table_source;
+
+  bool operator==(device_data_reference const& rhs) const
+  {
+    return std::tie(data_index, reference_type, table_source) ==
+           std::tie(rhs.data_index, rhs.reference_type, rhs.table_source);
+  }
+};
+
+// Type trait for wrapping nullable types in a thrust::optional. Non-nullable
+// types are returned as is.
+template <typename T, bool has_nulls>
+struct possibly_null_value;
+
+template <typename T>
+struct possibly_null_value<T, true> {
+  using type = thrust::optional<T>;
+};
+
+template <typename T>
+struct possibly_null_value<T, false> {
+  using type = T;
+};
+
+template <typename T, bool has_nulls>
+using possibly_null_value_t = typename possibly_null_value<T, has_nulls>::type;
+
+// Type used for intermediate storage in expression evaluation.
+template <bool has_nulls>
+using IntermediateDataType = possibly_null_value_t<std::int64_t, has_nulls>;
+
+/**
+ * @brief A container of all device data required to evaluate an expression on tables.
+ *
+ * This struct should never be instantiated directly. It is created by the
+ * `expression_parser` on construction, and the resulting member is publicly accessible
+ * for passing to kernels for constructing an `expression_evaluator`.
+ *
+ */
+struct expression_device_view {
+  device_span<detail::device_data_reference const> data_references;
+  device_span<cudf::detail::fixed_width_scalar_device_view_base const> literals;
+  device_span<ast_operator const> operators;
+  device_span<cudf::size_type const> operator_source_indices;
+  cudf::size_type num_intermediates;
+  int shmem_per_thread;
+};
+
+/**
+ * @brief The expression_parser traverses an expression and converts it into a form suitable for
+ * execution on the device.
+ *
+ * This class is part of a "visitor" pattern with the `node` class.
+ *
+ * This class does pre-processing work on the host, validating operators and operand data types. It
+ * traverses downward from a root node in a depth-first fashion, capturing information about
+ * the nodes and constructing vectors of information that are later used by the device for
+ * evaluating the abstract syntax tree as a "linear" list of operators whose input dependencies are
+ * resolved into intermediate data storage in shared memory.
+ */
+class expression_parser {
+ public:
+  /**
+   * @brief Construct a new expression_parser object
+   *
+   * @param expr The expression to create an evaluable expression_parser for.
+   * @param left The left table used for evaluating the abstract syntax tree.
+   * @param right The right table used for evaluating the abstract syntax tree.
+   */
+  expression_parser(node const& expr,
+                    cudf::table_view left,
+                    cudf::table_view right,
+                    bool has_nulls,
+                    rmm::cuda_stream_view stream,
+                    rmm::mr::device_memory_resource* mr)
+    : _left{left}, _right{right}, _node_count{0}, _intermediate_counter{}, _has_nulls(has_nulls)
+  {
+    expr.accept(*this);
+    move_to_device(stream, mr);
+  }
+
+  /**
+   * @brief Construct a new expression_parser object
+   *
+   * @param expr The expression to create an evaluable expression_parser for.
+   * @param table The table used for evaluating the abstract syntax tree.
+   */
+  expression_parser(node const& expr,
+                    cudf::table_view table,
+                    bool has_nulls,
+                    rmm::cuda_stream_view stream,
+                    rmm::mr::device_memory_resource* mr)
+    : expression_parser(expr, table, table, has_nulls, stream, mr)
+  {
+  }
+
+  /**
+   * @brief Get the root data type of the abstract syntax tree.
+   *
+   * @return cudf::data_type
+   */
+  cudf::data_type output_type() const;
+
+  /**
+   * @brief Visit a literal node.
+   *
+   * @param expr Literal node.
+   * @return cudf::size_type Index of device data reference for the node.
+   */
+  cudf::size_type visit(literal const& expr);
+
+  /**
+   * @brief Visit a column reference node.
+   *
+   * @param expr Column reference node.
+   * @return cudf::size_type Index of device data reference for the node.
+   */
+  cudf::size_type visit(column_reference const& expr);
+
+  /**
+   * @brief Visit an expression node.
+   *
+   * @param expr Expression node.
+   * @return cudf::size_type Index of device data reference for the node.
+   */
+  cudf::size_type visit(expression const& expr);
+
+  /**
+   * @brief Internal class used to track the utilization of intermediate storage locations.
+   *
+   * As nodes are being evaluated, they may generate "intermediate" data that is immediately
+   * consumed. Rather than manifesting this data in global memory, we can store intermediates of any
+   * fixed width type (up to 8 bytes) by placing them in shared memory. This class helps to track
+   * the number and indices of intermediate data in shared memory using a give-take model. Locations
+   * in shared memory can be "taken" and used for storage, "given back," and then later re-used.
+   * This aims to minimize the maximum amount of shared memory needed at any point during the
+   * evaluation.
+   *
+   */
+  class intermediate_counter {
+   public:
+    intermediate_counter() : used_values(), max_used(0) {}
+    cudf::size_type take();
+    void give(cudf::size_type value);
+    cudf::size_type get_max_used() const { return max_used; }
+
+   private:
+    /**
+     * @brief Find the first missing value in a contiguous sequence of integers.
+     *
+     * From a sorted container of integers, find the first "missing" value.
+     * For example, {0, 1, 2, 4, 5} is missing 3, and {1, 2, 3} is missing 0.
+     * If there are no missing values, return the size of the container.
+     *
+     * @return cudf::size_type Smallest value not already in the container.
+     */
+    cudf::size_type find_first_missing() const;
+
+    std::vector<cudf::size_type> used_values;
+    cudf::size_type max_used;
+  };
+
+  expression_device_view device_expression_data;  ///< The collection of data required to evaluate
+                                                  ///< the expression on the device.
+
+ private:
+  /**
+   * @brief Helper function for adding components (operators, literals, etc) to AST plan
+   *
+   * @tparam T  The underlying type of the input `std::vector`
+   * @param[in]  v  The `std::vector` containing components (operators, literals, etc).
+   * @param[in,out]  sizes  The `std::vector` containing the size of each data buffer.
+   * @param[in,out]  data_pointers  The `std::vector` containing pointers to each data buffer.
+   */
+  template <typename T>
+  void extract_size_and_pointer(std::vector<T> const& v,
+                                std::vector<cudf::size_type>& sizes,
+                                std::vector<void const*>& data_pointers)
+  {
+    auto const data_size = sizeof(T) * v.size();
+    sizes.push_back(data_size);
+    data_pointers.push_back(v.data());
+  }
+
+  void move_to_device(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
+  {
+    std::vector<cudf::size_type> sizes;
+    std::vector<void const*> data_pointers;
+
+    extract_size_and_pointer(_data_references, sizes, data_pointers);
+    extract_size_and_pointer(_literals, sizes, data_pointers);
+    extract_size_and_pointer(_operators, sizes, data_pointers);
+    extract_size_and_pointer(_operator_source_indices, sizes, data_pointers);
+
+    // Create device buffer
+    auto const buffer_size = std::accumulate(sizes.cbegin(), sizes.cend(), 0);
+    auto buffer_offsets    = std::vector<int>(sizes.size());
+    thrust::exclusive_scan(sizes.cbegin(), sizes.cend(), buffer_offsets.begin(), 0);
+
+    auto h_data_buffer = std::vector<char>(buffer_size);
+    for (unsigned int i = 0; i < data_pointers.size(); ++i) {
+      std::memcpy(h_data_buffer.data() + buffer_offsets[i], data_pointers[i], sizes[i]);
+    }
+
+    _device_data_buffer = rmm::device_buffer(h_data_buffer.data(), buffer_size, stream, mr);
+
+    stream.synchronize();
+
+    // Create device pointers to components of plan
+    auto device_data_buffer_ptr            = static_cast<char const*>(_device_data_buffer.data());
+    device_expression_data.data_references = device_span<detail::device_data_reference const>(
+      reinterpret_cast<detail::device_data_reference const*>(device_data_buffer_ptr +
+                                                             buffer_offsets[0]),
+      _data_references.size());
+    device_expression_data.literals =
+      device_span<cudf::detail::fixed_width_scalar_device_view_base const>(
+        reinterpret_cast<cudf::detail::fixed_width_scalar_device_view_base const*>(
+          device_data_buffer_ptr + buffer_offsets[1]),
+        _literals.size());
+    device_expression_data.operators = device_span<ast_operator const>(
+      reinterpret_cast<ast_operator const*>(device_data_buffer_ptr + buffer_offsets[2]),
+      _operators.size());
+    device_expression_data.operator_source_indices = device_span<cudf::size_type const>(
+      reinterpret_cast<cudf::size_type const*>(device_data_buffer_ptr + buffer_offsets[3]),
+      _operator_source_indices.size());
+    device_expression_data.num_intermediates = _intermediate_counter.get_max_used();
+    device_expression_data.shmem_per_thread  = static_cast<int>(
+      (_has_nulls ? sizeof(IntermediateDataType<true>) : sizeof(IntermediateDataType<false>)) *
+      device_expression_data.num_intermediates);
+  }
+
+  /**
+   * @brief Helper function for recursive traversal of expressions.
+   *
+   * When parsing an expression composed of subexpressions, all subexpressions
+   * must be evaluated before an operator can be applied to them. This method
+   * performs that recursive traversal (in conjunction with the
+   * `expression_parser.visit` and `expression.accept` methods if necessary to
+   * descend deeper into an expression tree).
+   *
+   * @param  operands  The operands to visit.
+   *
+   * @return The indices of the operands stored in the data references.
+   */
+  std::vector<cudf::size_type> visit_operands(
+    std::vector<std::reference_wrapper<node const>> operands);
+
+  /**
+   * @brief Add a data reference to the internal list.
+   *
+   * @param  data_ref  The data reference to add.
+   *
+   * @return The index of the added data reference in the internal data references list.
+   */
+  cudf::size_type add_data_reference(detail::device_data_reference data_ref);
+
+  rmm::device_buffer
+    _device_data_buffer;  ///< The device-side data buffer containing the plan information, which is
+                          ///< owned by this class and persists until it is destroyed.
+
+  cudf::table_view const& _left;
+  cudf::table_view const& _right;
+  cudf::size_type _node_count;
+  intermediate_counter _intermediate_counter;
+  bool _has_nulls;
+  std::vector<detail::device_data_reference> _data_references;
+  std::vector<ast_operator> _operators;
+  std::vector<cudf::size_type> _operator_source_indices;
+  std::vector<cudf::detail::fixed_width_scalar_device_view_base> _literals;
+};
+
+}  // namespace detail
+
+}  // namespace ast
+
+}  // namespace cudf
diff --git a/cpp/include/cudf/ast/detail/linearizer.hpp b/cpp/include/cudf/ast/detail/linearizer.hpp
deleted file mode 100644
index 59eda1df7b7..00000000000
--- a/cpp/include/cudf/ast/detail/linearizer.hpp
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <cudf/ast/operators.hpp>
-#include <cudf/scalar/scalar_device_view.cuh>
-#include <cudf/table/table_view.hpp>
-#include <cudf/types.hpp>
-
-namespace cudf {
-namespace ast {
-
-// Forward declaration
-enum class table_reference;
-class literal;
-class column_reference;
-class expression;
-
-namespace detail {
-
-/**
- * @brief Enum defining data reference types used by a node.
- *
- * This enum is device-specific. For instance, intermediate data references are generated by the
- * linearization process but cannot be explicitly created by the user.
- */
-enum class device_data_reference_type {
-  COLUMN,       // A value in a table column
-  LITERAL,      // A literal value
-  INTERMEDIATE  // An internal temporary value
-};
-
-/**
- * @brief A device data reference describes a source of data used by a node.
- *
- * This is a POD class used to create references describing data type and locations for consumption
- * by the `row_evaluator`.
- */
-struct alignas(8) device_data_reference {
-  device_data_reference(device_data_reference_type reference_type,
-                        cudf::data_type data_type,
-                        cudf::size_type data_index,
-                        table_reference table_source);
-
-  device_data_reference(device_data_reference_type reference_type,
-                        cudf::data_type data_type,
-                        cudf::size_type data_index);
-
-  const device_data_reference_type reference_type;  // Source of data
-  const cudf::data_type data_type;                  // Type of data
-  const cudf::size_type data_index;                 // The column index of a table, index of a
-                                                    // literal, or index of an intermediate
-  const table_reference table_source;
-
-  inline bool operator==(const device_data_reference& rhs) const
-  {
-    return std::tie(data_index, reference_type, table_source) ==
-           std::tie(rhs.data_index, rhs.reference_type, rhs.table_source);
-  }
-};
-
-// Forward declaration
-class linearizer;
-
-/**
- * @brief A generic node that can be evaluated to return a value.
- *
- * This class is a part of a "visitor" pattern with the `linearizer` class.
- * Nodes inheriting from this class can accept visitors.
- */
-struct node {
-  virtual cudf::size_type accept(detail::linearizer& visitor) const = 0;
-};
-
-/**
- * @brief The linearizer traverses an abstract syntax tree to prepare for execution on the device.
- *
- * This class is part of a "visitor" pattern with the `node` class.
- *
- * This class does pre-processing work on the host, validating operators and operand data types. It
- * traverses downward from a root node in a depth-first fashion, capturing information about
- * the nodes and constructing vectors of information that are later used by the device for
- * evaluating the abstract syntax tree as a "linear" list of operators whose input dependencies are
- * resolved into intermediate data storage in shared memory.
- */
-class linearizer {
- public:
-  /**
-   * @brief Construct a new linearizer object
-   *
-   * @param expr The expression to create an evaluable linearizer for.
-   * @param left The left table used for evaluating the abstract syntax tree.
-   * @param right The right table used for evaluating the abstract syntax tree.
-   */
-  linearizer(detail::node const& expr, cudf::table_view left, cudf::table_view right)
-    : _left{left}, _right{right}, _node_count{0}, _intermediate_counter{}
-  {
-    expr.accept(*this);
-  }
-
-  /**
-   * @brief Construct a new linearizer object
-   *
-   * @param expr The expression to create an evaluable linearizer for.
-   * @param table The table used for evaluating the abstract syntax tree.
-   */
-  linearizer(detail::node const& expr, cudf::table_view table)
-    : _left{table}, _right{table}, _node_count{0}, _intermediate_counter{}
-  {
-    expr.accept(*this);
-  }
-
-  /**
-   * @brief Get the root data type of the abstract syntax tree.
-   *
-   * @return cudf::data_type
-   */
-  cudf::data_type root_data_type() const;
-
-  /**
-   * @brief Get the maximum number of intermediates stored by the abstract syntax tree.
-   *
-   * @return cudf::size_type
-   */
-  cudf::size_type intermediate_count() const { return _intermediate_counter.get_max_used(); }
-
-  /**
-   * @brief Get the device data references.
-   *
-   * @return std::vector<detail::device_data_reference>
-   */
-  std::vector<detail::device_data_reference> const& data_references() const
-  {
-    return _data_references;
-  }
-
-  /**
-   * @brief Get the operators.
-   *
-   * @return std::vector<ast_operator>
-   */
-  std::vector<ast_operator> const& operators() const { return _operators; }
-
-  /**
-   * @brief Get the operator source indices.
-   *
-   * @return std::vector<cudf::size_type>
-   */
-  std::vector<cudf::size_type> const& operator_source_indices() const
-  {
-    return _operator_source_indices;
-  }
-
-  /**
-   * @brief Get the literal device views.
-   *
-   * @return std::vector<cudf::detail::fixed_width_scalar_device_view_base>
-   */
-  std::vector<cudf::detail::fixed_width_scalar_device_view_base> const& literals() const
-  {
-    return _literals;
-  }
-
-  /**
-   * @brief Visit a literal node.
-   *
-   * @param expr Literal node.
-   * @return cudf::size_type Index of device data reference for the node.
-   */
-  cudf::size_type visit(literal const& expr);
-
-  /**
-   * @brief Visit a column reference node.
-   *
-   * @param expr Column reference node.
-   * @return cudf::size_type Index of device data reference for the node.
-   */
-  cudf::size_type visit(column_reference const& expr);
-
-  /**
-   * @brief Visit an expression node.
-   *
-   * @param expr Expression node.
-   * @return cudf::size_type Index of device data reference for the node.
-   */
-  cudf::size_type visit(expression const& expr);
-
-  /**
-   * @brief Internal class used to track the utilization of intermediate storage locations.
-   *
-   * As nodes are being evaluated, they may generate "intermediate" data that is immediately
-   * consumed. Rather than manifesting this data in global memory, we can store intermediates of any
-   * fixed width type (up to 8 bytes) by placing them in shared memory. This class helps to track
-   * the number and indices of intermediate data in shared memory using a give-take model. Locations
-   * in shared memory can be "taken" and used for storage, "given back," and then later re-used.
-   * This aims to minimize the maximum amount of shared memory needed at any point during the
-   * evaluation.
-   *
-   */
-  class intermediate_counter {
-   public:
-    intermediate_counter() : used_values(), max_used(0) {}
-    cudf::size_type take();
-    void give(cudf::size_type value);
-    cudf::size_type get_max_used() const { return max_used; }
-
-   private:
-    cudf::size_type find_first_missing() const;
-    std::vector<cudf::size_type> used_values;
-    cudf::size_type max_used;
-  };
-
- private:
-  std::vector<cudf::size_type> visit_operands(
-    std::vector<std::reference_wrapper<const node>> operands);
-  cudf::size_type add_data_reference(detail::device_data_reference data_ref);
-
-  // State information about the "linearized" GPU execution plan
-  cudf::table_view const& _left;
-  cudf::table_view const& _right;
-  cudf::size_type _node_count;
-  intermediate_counter _intermediate_counter;
-  std::vector<detail::device_data_reference> _data_references;
-  std::vector<ast_operator> _operators;
-  std::vector<cudf::size_type> _operator_source_indices;
-  std::vector<cudf::detail::fixed_width_scalar_device_view_base> _literals;
-};
-
-}  // namespace detail
-
-}  // namespace ast
-
-}  // namespace cudf
diff --git a/cpp/include/cudf/ast/detail/operators.hpp b/cpp/include/cudf/ast/detail/operators.hpp
index 01ec5b74b77..fd3a0775401 100644
--- a/cpp/include/cudf/ast/detail/operators.hpp
+++ b/cpp/include/cudf/ast/detail/operators.hpp
@@ -787,14 +787,6 @@ struct single_dispatch_binary_operator_types {
   }
 };
 
-struct single_dispatch_binary_operator {
-  template <typename LHS, typename F, typename... Ts>
-  CUDA_DEVICE_CALLABLE auto operator()(F&& f, Ts&&... args)
-  {
-    f.template operator()<LHS, LHS>(std::forward<Ts>(args)...);
-  }
-};
-
 /**
  * @brief Functor performing a type dispatch for a binary operator.
  *
diff --git a/cpp/include/cudf/ast/detail/transform.cuh b/cpp/include/cudf/ast/detail/transform.cuh
index 89fa7d31980..8a99f1c93a8 100644
--- a/cpp/include/cudf/ast/detail/transform.cuh
+++ b/cpp/include/cudf/ast/detail/transform.cuh
@@ -15,7 +15,7 @@
  */
 #pragma once
 
-#include <cudf/ast/detail/linearizer.hpp>
+#include <cudf/ast/detail/expression_parser.hpp>
 #include <cudf/ast/detail/operators.hpp>
 #include <cudf/ast/nodes.hpp>
 #include <cudf/ast/operators.hpp>
@@ -31,36 +31,12 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
-#include <thrust/optional.h>
-
 namespace cudf {
 
 namespace ast {
 
 namespace detail {
 
-// Type trait for wrapping nullable types in a thrust::optional. Non-nullable
-// types are returned as is.
-template <typename T, bool has_nulls>
-struct possibly_null_value;
-
-template <typename T>
-struct possibly_null_value<T, true> {
-  using type = thrust::optional<T>;
-};
-
-template <typename T>
-struct possibly_null_value<T, false> {
-  using type = T;
-};
-
-template <typename T, bool has_nulls>
-using possibly_null_value_t = typename possibly_null_value<T, has_nulls>::type;
-
-// Type used for intermediate storage in expression evaluation.
-template <bool has_nulls>
-using IntermediateDataType = possibly_null_value_t<std::int64_t, has_nulls>;
-
 /**
  * @brief A container for capturing the output of an evaluated expression.
  *
@@ -214,147 +190,30 @@ struct mutable_column_expression_result
 };
 
 /**
- * @brief A container of all device data required to evaluate an expression on tables.
- *
- * This struct should never be instantiated directly. It is created by the
- * `ast_plan` on construction, and the resulting member is publicly accessible
- * for passing to kernels for constructing an `expression_evaluator`.
- *
- */
-struct device_ast_plan {
-  device_span<const detail::device_data_reference> data_references;
-  device_span<const cudf::detail::fixed_width_scalar_device_view_base> literals;
-  device_span<const ast_operator> operators;
-  device_span<const cudf::size_type> operator_source_indices;
-  cudf::size_type num_intermediates;
-  int shmem_per_thread;
-};
-
-/**
- * @brief Preprocessor for an expression acting on tables to generate data suitable for AST
- * expression evaluation on the GPU.
- *
- * On construction, an AST plan creates a single "packed" host buffer of all
- * data arrays that will be necessary to evaluate an expression on a pair of
- * tables. This data is copied to a single contiguous device buffer, and
- * pointers are generated to the individual components. Because the plan tends
- * to be small, this is the most efficient approach for low latency. All the
- * data required on the GPU can be accessed via the convenient `dev_plan`
- * member struct, which can be used to construct an `expression_evaluator` on
- * the device.
+ * @brief Despite to a binary operator based on a single data type.
  *
- * Note that the resulting device data cannot be used once this class goes out of scope.
+ * This functor is a dispatcher for binary operations that assumes that both
+ * operands to a binary operation are of the same type. This assumption is
+ * encoded in the one non-deducible template parameter LHS, the type of the
+ * left-hand operand, which is then used as the template parameter for both the
+ * left and right operands to the binary operator f.
  */
-struct ast_plan {
+struct single_dispatch_binary_operator {
   /**
-   * @brief Construct an AST plan for an expression operating on two tables.
+   * @brief Single-type dispatch to a binary operation.
    *
-   * @param expr The expression for which to construct a plan.
-   * @param left The left table on which the expression acts.
-   * @param right The right table on which the expression acts.
-   * @param has_nulls Boolean indicator of whether or not the data contains nulls.
-   * @param stream Stream view on which to allocate resources and queue execution.
-   * @param mr Device memory resource used to allocate the returned column's device.
-   */
-  ast_plan(detail::node const& expr,
-           cudf::table_view left,
-           cudf::table_view right,
-           bool has_nulls,
-           rmm::cuda_stream_view stream,
-           rmm::mr::device_memory_resource* mr)
-    : _linearizer(expr, left, right)
-  {
-    std::vector<cudf::size_type> sizes;
-    std::vector<const void*> data_pointers;
-
-    extract_size_and_pointer(_linearizer.data_references(), sizes, data_pointers);
-    extract_size_and_pointer(_linearizer.literals(), sizes, data_pointers);
-    extract_size_and_pointer(_linearizer.operators(), sizes, data_pointers);
-    extract_size_and_pointer(_linearizer.operator_source_indices(), sizes, data_pointers);
-
-    // Create device buffer
-    auto const buffer_size = std::accumulate(sizes.cbegin(), sizes.cend(), 0);
-    auto buffer_offsets    = std::vector<int>(sizes.size());
-    thrust::exclusive_scan(sizes.cbegin(), sizes.cend(), buffer_offsets.begin(), 0);
-
-    auto h_data_buffer = std::make_unique<char[]>(buffer_size);
-    for (unsigned int i = 0; i < data_pointers.size(); ++i) {
-      std::memcpy(h_data_buffer.get() + buffer_offsets[i], data_pointers[i], sizes[i]);
-    }
-
-    _device_data_buffer = rmm::device_buffer(h_data_buffer.get(), buffer_size, stream, mr);
-
-    stream.synchronize();
-
-    // Create device pointers to components of plan
-    auto device_data_buffer_ptr = static_cast<const char*>(_device_data_buffer.data());
-    dev_plan.data_references    = device_span<const detail::device_data_reference>(
-      reinterpret_cast<const detail::device_data_reference*>(device_data_buffer_ptr +
-                                                             buffer_offsets[0]),
-      _linearizer.data_references().size());
-    dev_plan.literals = device_span<const cudf::detail::fixed_width_scalar_device_view_base>(
-      reinterpret_cast<const cudf::detail::fixed_width_scalar_device_view_base*>(
-        device_data_buffer_ptr + buffer_offsets[1]),
-      _linearizer.literals().size());
-    dev_plan.operators = device_span<const ast_operator>(
-      reinterpret_cast<const ast_operator*>(device_data_buffer_ptr + buffer_offsets[2]),
-      _linearizer.operators().size());
-    dev_plan.operator_source_indices = device_span<const cudf::size_type>(
-      reinterpret_cast<const cudf::size_type*>(device_data_buffer_ptr + buffer_offsets[3]),
-      _linearizer.operator_source_indices().size());
-    dev_plan.num_intermediates = _linearizer.intermediate_count();
-    dev_plan.shmem_per_thread  = static_cast<int>(
-      (has_nulls ? sizeof(IntermediateDataType<true>) : sizeof(IntermediateDataType<false>)) *
-      dev_plan.num_intermediates);
-  }
-
-  /**
-   * @brief Construct an AST plan for an expression operating on one table.
+   * @tparam LHS Left input type.
+   * @tparam F Type of forwarded binary operator functor.
+   * @tparam Ts Parameter pack of forwarded arguments.
    *
-   * @param expr The expression for which to construct a plan.
-   * @param table The table on which the expression acts.
-   * @param has_nulls Boolean indicator of whether or not the data contains nulls.
-   * @param stream Stream view on which to allocate resources and queue execution.
-   * @param mr Device memory resource used to allocate the returned column's device.
+   * @param f Binary operator functor.
+   * @param args Forwarded arguments to `operator()` of `f`.
    */
-  ast_plan(detail::node const& expr,
-           cudf::table_view table,
-           bool has_nulls,
-           rmm::cuda_stream_view stream,
-           rmm::mr::device_memory_resource* mr)
-    : ast_plan(expr, table, table, has_nulls, stream, mr)
+  template <typename LHS, typename F, typename... Ts>
+  CUDA_DEVICE_CALLABLE auto operator()(F&& f, Ts&&... args)
   {
+    f.template operator()<LHS, LHS>(std::forward<Ts>(args)...);
   }
-
-  cudf::data_type output_type() const { return _linearizer.root_data_type(); }
-
-  device_ast_plan
-    dev_plan;  ///< The collection of data required to evaluate the expression on the device.
-
- private:
-  /**
-   * @brief Helper function for adding components (operators, literals, etc) to AST plan
-   *
-   * @tparam T  The underlying type of the input `std::vector`
-   * @param[in]  v  The `std::vector` containing components (operators, literals, etc).
-   * @param[in,out]  sizes  The `std::vector` containing the size of each data buffer.
-   * @param[in,out]  data_pointers  The `std::vector` containing pointers to each data buffer.
-   */
-  template <typename T>
-  void extract_size_and_pointer(std::vector<T> const& v,
-                                std::vector<cudf::size_type>& sizes,
-                                std::vector<const void*>& data_pointers)
-  {
-    auto const data_size = sizeof(T) * v.size();
-    sizes.push_back(data_size);
-    data_pointers.push_back(v.data());
-  }
-
-  rmm::device_buffer
-    _device_data_buffer;  ///< The device-side data buffer containing the plan information, which is
-                          ///< owned by this class and persists until it is destroyed.
-  linearizer const _linearizer;  ///< The linearizer created from the provided expression that is
-                                 ///< used to construct device-side operators and references.
 };
 
 /**
@@ -379,7 +238,7 @@ struct expression_evaluator {
    */
   __device__ expression_evaluator(table_device_view const& left,
                                   table_device_view const& right,
-                                  device_ast_plan const& plan,
+                                  expression_device_view const& plan,
                                   IntermediateDataType<has_nulls>* thread_intermediate_storage,
                                   null_equality compare_nulls = null_equality::EQUAL)
     : left(left),
@@ -400,7 +259,7 @@ struct expression_evaluator {
    * @param compare_nulls Whether the equality operator returns true or false for two nulls.
    */
   __device__ expression_evaluator(table_device_view const& table,
-                                  device_ast_plan const& plan,
+                                  expression_device_view const& plan,
                                   IntermediateDataType<has_nulls>* thread_intermediate_storage,
                                   null_equality compare_nulls = null_equality::EQUAL)
     : left(table),
@@ -484,11 +343,11 @@ struct expression_evaluator {
    */
   template <typename Input, typename OutputType>
   __device__ void operator()(OutputType& output_object,
-                             const cudf::size_type input_row_index,
-                             const detail::device_data_reference input,
-                             const detail::device_data_reference output,
-                             const cudf::size_type output_row_index,
-                             const ast_operator op) const
+                             cudf::size_type const input_row_index,
+                             detail::device_data_reference const input,
+                             detail::device_data_reference const output,
+                             cudf::size_type const output_row_index,
+                             ast_operator const op) const
   {
     auto const typed_input = resolve_input<Input>(input, input_row_index);
     ast_operator_dispatcher(op,
@@ -517,13 +376,13 @@ struct expression_evaluator {
    */
   template <typename LHS, typename RHS, typename OutputType>
   __device__ void operator()(OutputType& output_object,
-                             const cudf::size_type left_row_index,
-                             const cudf::size_type right_row_index,
-                             const detail::device_data_reference lhs,
-                             const detail::device_data_reference rhs,
-                             const detail::device_data_reference output,
-                             const cudf::size_type output_row_index,
-                             const ast_operator op) const
+                             cudf::size_type const left_row_index,
+                             cudf::size_type const right_row_index,
+                             detail::device_data_reference const lhs,
+                             detail::device_data_reference const rhs,
+                             detail::device_data_reference const output,
+                             cudf::size_type const output_row_index,
+                             ast_operator const op) const
   {
     auto const typed_lhs = resolve_input<LHS>(lhs, left_row_index);
     auto const typed_rhs = resolve_input<RHS>(rhs, right_row_index);
@@ -544,11 +403,11 @@ struct expression_evaluator {
   __device__ void operator()(OutputType& output_object,
                              cudf::size_type left_row_index,
                              cudf::size_type right_row_index,
-                             const detail::device_data_reference lhs,
-                             const detail::device_data_reference rhs,
-                             const detail::device_data_reference output,
+                             detail::device_data_reference const lhs,
+                             detail::device_data_reference const rhs,
+                             detail::device_data_reference const output,
                              cudf::size_type output_row_index,
-                             const ast_operator op) const
+                             ast_operator const op) const
   {
     cudf_assert(false && "Invalid binary dispatch operator for the provided input.");
   }
@@ -670,9 +529,9 @@ struct expression_evaluator {
               typename OutputType,
               CUDF_ENABLE_IF(is_rep_layout_compatible<Element>())>
     __device__ void resolve_output(OutputType& output_object,
-                                   const detail::device_data_reference device_data_reference,
-                                   const cudf::size_type row_index,
-                                   const possibly_null_value_t<Element, has_nulls> result) const
+                                   detail::device_data_reference const device_data_reference,
+                                   cudf::size_type const row_index,
+                                   possibly_null_value_t<Element, has_nulls> const result) const
     {
       auto const ref_type = device_data_reference.reference_type;
       if (ref_type == detail::device_data_reference_type::COLUMN) {
@@ -690,9 +549,9 @@ struct expression_evaluator {
               typename OutputType,
               CUDF_ENABLE_IF(not is_rep_layout_compatible<Element>())>
     __device__ void resolve_output(OutputType& output_object,
-                                   const detail::device_data_reference device_data_reference,
-                                   const cudf::size_type row_index,
-                                   const possibly_null_value_t<Element, has_nulls> result) const
+                                   detail::device_data_reference const device_data_reference,
+                                   cudf::size_type const row_index,
+                                   possibly_null_value_t<Element, has_nulls> const result) const
     {
       cudf_assert(false && "Invalid type in resolve_output.");
     }
@@ -730,9 +589,9 @@ struct expression_evaluator {
       typename OutputType,
       std::enable_if_t<detail::is_valid_unary_op<detail::operator_functor<op>, Input>>* = nullptr>
     __device__ void operator()(OutputType& output_object,
-                               const cudf::size_type output_row_index,
-                               const possibly_null_value_t<Input, has_nulls> input,
-                               const detail::device_data_reference output) const
+                               cudf::size_type const output_row_index,
+                               possibly_null_value_t<Input, has_nulls> const input,
+                               detail::device_data_reference const output) const
     {
       using OperatorFunctor = detail::operator_functor<op>;
       using Out             = cuda::std::invoke_result_t<OperatorFunctor, Input>;
@@ -752,9 +611,9 @@ struct expression_evaluator {
       typename OutputType,
       std::enable_if_t<!detail::is_valid_unary_op<detail::operator_functor<op>, Input>>* = nullptr>
     __device__ void operator()(OutputType& output_object,
-                               const cudf::size_type output_row_index,
-                               const possibly_null_value_t<Input, has_nulls> input,
-                               const detail::device_data_reference output) const
+                               cudf::size_type const output_row_index,
+                               possibly_null_value_t<Input, has_nulls> const input,
+                               detail::device_data_reference const output) const
     {
       cudf_assert(false && "Invalid unary dispatch operator for the provided input.");
     }
@@ -790,10 +649,10 @@ struct expression_evaluator {
               std::enable_if_t<
                 detail::is_valid_binary_op<detail::operator_functor<op>, LHS, RHS>>* = nullptr>
     __device__ void operator()(OutputType& output_object,
-                               const cudf::size_type output_row_index,
-                               const possibly_null_value_t<LHS, has_nulls> lhs,
-                               const possibly_null_value_t<RHS, has_nulls> rhs,
-                               const detail::device_data_reference output) const
+                               cudf::size_type const output_row_index,
+                               possibly_null_value_t<LHS, has_nulls> const lhs,
+                               possibly_null_value_t<RHS, has_nulls> const rhs,
+                               detail::device_data_reference const output) const
     {
       using OperatorFunctor = detail::operator_functor<op>;
       using Out             = cuda::std::invoke_result_t<OperatorFunctor, LHS, RHS>;
@@ -832,10 +691,10 @@ struct expression_evaluator {
               std::enable_if_t<
                 !detail::is_valid_binary_op<detail::operator_functor<op>, LHS, RHS>>* = nullptr>
     __device__ void operator()(OutputType& output_object,
-                               const cudf::size_type output_row_index,
-                               const possibly_null_value_t<LHS, has_nulls> lhs,
-                               const possibly_null_value_t<RHS, has_nulls> rhs,
-                               const detail::device_data_reference output) const
+                               cudf::size_type const output_row_index,
+                               possibly_null_value_t<LHS, has_nulls> const lhs,
+                               possibly_null_value_t<RHS, has_nulls> const rhs,
+                               detail::device_data_reference output) const
     {
       cudf_assert(false && "Invalid binary dispatch operator for the provided input.");
     }
@@ -843,7 +702,7 @@ struct expression_evaluator {
 
   table_device_view const& left;   ///< The left table to operate on.
   table_device_view const& right;  ///< The right table to operate on.
-  device_ast_plan const&
+  expression_device_view const&
     plan;  ///< The container of device data representing the expression to evaluate.
   IntermediateDataType<has_nulls>*
     thread_intermediate_storage;  ///< The shared memory store of intermediates produced during
diff --git a/cpp/include/cudf/ast/nodes.hpp b/cpp/include/cudf/ast/nodes.hpp
index 70dda58816e..f36d7bcd3c7 100644
--- a/cpp/include/cudf/ast/nodes.hpp
+++ b/cpp/include/cudf/ast/nodes.hpp
@@ -15,7 +15,6 @@
  */
 #pragma once
 
-#include <cudf/ast/detail/linearizer.hpp>
 #include <cudf/ast/detail/operators.hpp>
 #include <cudf/ast/operators.hpp>
 #include <cudf/scalar/scalar.hpp>
@@ -26,6 +25,21 @@
 
 namespace cudf {
 namespace ast {
+namespace detail {
+
+// Forward declaration
+class expression_parser;
+/**
+ * @brief A generic node that can be evaluated to return a value.
+ *
+ * This class is a part of a "visitor" pattern with the `linearizer` class.
+ * Nodes inheriting from this class can accept visitors.
+ */
+struct node {
+  virtual cudf::size_type accept(expression_parser& visitor) const = 0;
+};
+
+}  // namespace detail
 
 /**
  * @brief Enum of table references.
@@ -96,10 +110,10 @@ class literal : public detail::node {
    * @param visitor Visitor.
    * @return cudf::size_type Index of device data reference for this instance.
    */
-  cudf::size_type accept(detail::linearizer& visitor) const override;
+  cudf::size_type accept(detail::expression_parser& visitor) const override;
 
  private:
-  const cudf::detail::fixed_width_scalar_device_view_base value;
+  cudf::detail::fixed_width_scalar_device_view_base const value;
 };
 
 /**
@@ -140,7 +154,7 @@ class column_reference : public detail::node {
    * @param table Table used to determine types.
    * @return cudf::data_type
    */
-  cudf::data_type get_data_type(const table_view& table) const
+  cudf::data_type get_data_type(table_view const& table) const
   {
     return table.column(get_column_index()).type();
   }
@@ -152,9 +166,9 @@ class column_reference : public detail::node {
    * @param right_table Right table used to determine types.
    * @return cudf::data_type
    */
-  cudf::data_type get_data_type(const table_view& left_table, const table_view& right_table) const
+  cudf::data_type get_data_type(table_view const& left_table, table_view const& right_table) const
   {
-    const auto table = [&] {
+    auto const table = [&] {
       if (get_table_source() == table_reference::LEFT) {
         return left_table;
       } else if (get_table_source() == table_reference::RIGHT) {
@@ -172,7 +186,7 @@ class column_reference : public detail::node {
    * @param visitor Visitor.
    * @return cudf::size_type Index of device data reference for this instance.
    */
-  cudf::size_type accept(detail::linearizer& visitor) const override;
+  cudf::size_type accept(detail::expression_parser& visitor) const override;
 
  private:
   cudf::size_type column_index;
@@ -230,7 +244,7 @@ class expression : public detail::node {
    *
    * @return std::vector<std::reference_wrapper<const node>>
    */
-  std::vector<std::reference_wrapper<const node>> get_operands() const { return operands; }
+  std::vector<std::reference_wrapper<node const>> get_operands() const { return operands; }
 
   /**
    * @brief Accepts a visitor class.
@@ -238,11 +252,11 @@ class expression : public detail::node {
    * @param visitor Visitor.
    * @return cudf::size_type Index of device data reference for this instance.
    */
-  cudf::size_type accept(detail::linearizer& visitor) const override;
+  cudf::size_type accept(detail::expression_parser& visitor) const override;
 
  private:
-  const ast_operator op;
-  const std::vector<std::reference_wrapper<const node>> operands;
+  ast_operator const op;
+  std::vector<std::reference_wrapper<node const>> const operands;
 };
 
 }  // namespace ast
diff --git a/cpp/src/ast/linearizer.cpp b/cpp/src/ast/expression_parser.cpp
similarity index 81%
rename from cpp/src/ast/linearizer.cpp
rename to cpp/src/ast/expression_parser.cpp
index 3e442305552..66d72fbb454 100644
--- a/cpp/src/ast/linearizer.cpp
+++ b/cpp/src/ast/expression_parser.cpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <cudf/ast/detail/linearizer.hpp>
+#include <cudf/ast/detail/expression_parser.hpp>
 #include <cudf/ast/nodes.hpp>
 #include <cudf/ast/operators.hpp>
 #include <cudf/scalar/scalar.hpp>
@@ -56,7 +56,7 @@ device_data_reference::device_data_reference(device_data_reference_type referenc
 {
 }
 
-cudf::size_type linearizer::intermediate_counter::take()
+cudf::size_type expression_parser::intermediate_counter::take()
 {
   auto const first_missing = find_first_missing();
   used_values.insert(used_values.cbegin() + first_missing, first_missing);
@@ -64,7 +64,7 @@ cudf::size_type linearizer::intermediate_counter::take()
   return first_missing;
 }
 
-void linearizer::intermediate_counter::give(cudf::size_type value)
+void expression_parser::intermediate_counter::give(cudf::size_type value)
 {
   // TODO: add comment
   auto const lower_bound = std::lower_bound(used_values.cbegin(), used_values.cend(), value);
@@ -72,18 +72,7 @@ void linearizer::intermediate_counter::give(cudf::size_type value)
     used_values.erase(lower_bound);
 }
 
-/**
- * @brief Find the first missing value in a contiguous sequence of integers.
- *
- * From a sorted container of integers, find the first "missing" value.
- * For example, {0, 1, 2, 4, 5} is missing 3, and {1, 2, 3} is missing 0.
- * If there are no missing values, return the size of the container.
- *
- * @param start Starting index.
- * @param end Ending index.
- * @return cudf::size_type Smallest value not already in the container.
- */
-cudf::size_type linearizer::intermediate_counter::find_first_missing() const
+cudf::size_type expression_parser::intermediate_counter::find_first_missing() const
 {
   if (used_values.empty() || (used_values.front() != 0)) { return 0; }
   // Search for the first non-contiguous pair of elements.
@@ -94,7 +83,7 @@ cudf::size_type linearizer::intermediate_counter::find_first_missing() const
            : used_values.size();  // No missing elements. Return the next element in the sequence.
 }
 
-cudf::size_type linearizer::visit(literal const& expr)
+cudf::size_type expression_parser::visit(literal const& expr)
 {
   _node_count++;                                                 // Increment the node index
   auto const data_type     = expr.get_data_type();               // Resolve node type
@@ -106,7 +95,7 @@ cudf::size_type linearizer::visit(literal const& expr)
   return add_data_reference(source);
 }
 
-cudf::size_type linearizer::visit(column_reference const& expr)
+cudf::size_type expression_parser::visit(column_reference const& expr)
 {
   // Increment the node index
   _node_count++;
@@ -122,14 +111,14 @@ cudf::size_type linearizer::visit(column_reference const& expr)
   return add_data_reference(source);
 }
 
-cudf::size_type linearizer::visit(expression const& expr)
+cudf::size_type expression_parser::visit(expression const& expr)
 {
   // Increment the node index
   auto const node_index = _node_count++;
   // Visit children (operands) of this node
   auto const operand_data_ref_indices = visit_operands(expr.get_operands());
   // Resolve operand types
-  auto data_ref = [this](auto const& index) { return data_references()[index].data_type; };
+  auto data_ref = [this](auto const& index) { return _data_references[index].data_type; };
   auto begin    = thrust::make_transform_iterator(operand_data_ref_indices.cbegin(), data_ref);
   auto end      = begin + operand_data_ref_indices.size();
   auto const operand_types = std::vector<cudf::data_type>(begin, end);
@@ -145,7 +134,7 @@ cudf::size_type linearizer::visit(expression const& expr)
     operand_data_ref_indices.cbegin(),
     operand_data_ref_indices.cend(),
     [this](auto const& data_reference_index) {
-      auto const operand_source = data_references()[data_reference_index];
+      auto const operand_source = _data_references[data_reference_index];
       if (operand_source.reference_type == detail::device_data_reference_type::INTERMEDIATE) {
         auto const intermediate_index = operand_source.data_index;
         _intermediate_counter.give(intermediate_index);
@@ -167,7 +156,8 @@ cudf::size_type linearizer::visit(expression const& expr)
       if (!cudf::is_fixed_width(data_type)) {
         CUDF_FAIL(
           "The output data type is not a fixed-width type but must be stored in an intermediate.");
-      } else if (cudf::size_of(data_type) > sizeof(std::int64_t)) {
+      } else if (cudf::size_of(data_type) > (_has_nulls ? sizeof(IntermediateDataType<true>)
+                                                        : sizeof(IntermediateDataType<false>))) {
         CUDF_FAIL("The output data type is too large to be stored in an intermediate.");
       }
       return detail::device_data_reference(
@@ -183,14 +173,14 @@ cudf::size_type linearizer::visit(expression const& expr)
   return index;
 }
 
-cudf::data_type linearizer::root_data_type() const
+cudf::data_type expression_parser::output_type() const
 {
-  return data_references().empty() ? cudf::data_type(cudf::type_id::EMPTY)
-                                   : data_references().back().data_type;
+  return _data_references.empty() ? cudf::data_type(cudf::type_id::EMPTY)
+                                  : _data_references.back().data_type;
 }
 
-std::vector<cudf::size_type> linearizer::visit_operands(
-  std::vector<std::reference_wrapper<const node>> operands)
+std::vector<cudf::size_type> expression_parser::visit_operands(
+  std::vector<std::reference_wrapper<detail::node const>> operands)
 {
   auto operand_data_reference_indices = std::vector<cudf::size_type>();
   for (auto const& operand : operands) {
@@ -200,7 +190,7 @@ std::vector<cudf::size_type> linearizer::visit_operands(
   return operand_data_reference_indices;
 }
 
-cudf::size_type linearizer::add_data_reference(detail::device_data_reference data_ref)
+cudf::size_type expression_parser::add_data_reference(detail::device_data_reference data_ref)
 {
   // If an equivalent data reference already exists, return its index. Otherwise add this data
   // reference and return the new index.
@@ -215,12 +205,15 @@ cudf::size_type linearizer::add_data_reference(detail::device_data_reference dat
 
 }  // namespace detail
 
-cudf::size_type literal::accept(detail::linearizer& visitor) const { return visitor.visit(*this); }
-cudf::size_type column_reference::accept(detail::linearizer& visitor) const
+cudf::size_type literal::accept(detail::expression_parser& visitor) const
+{
+  return visitor.visit(*this);
+}
+cudf::size_type column_reference::accept(detail::expression_parser& visitor) const
 {
   return visitor.visit(*this);
 }
-cudf::size_type expression::accept(detail::linearizer& visitor) const
+cudf::size_type expression::accept(detail::expression_parser& visitor) const
 {
   return visitor.visit(*this);
 }
diff --git a/cpp/src/ast/transform.cu b/cpp/src/ast/transform.cu
index d6426f92002..4656d4b48c0 100644
--- a/cpp/src/ast/transform.cu
+++ b/cpp/src/ast/transform.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <cudf/ast/detail/expression_parser.hpp>
 #include <cudf/ast/detail/transform.cuh>
 #include <cudf/ast/nodes.hpp>
 #include <cudf/ast/operators.hpp>
@@ -47,13 +48,14 @@ namespace detail {
  * @tparam has_nulls whether or not the output column may contain nulls.
  *
  * @param table The table device view used for evaluation.
- * @param plan Container of device data required to evaluate the desired expression.
+ * @param device_expression_data Container of device data required to evaluate the desired
+ * expression.
  * @param output_column The destination for the results of evaluating the expression.
  */
 template <cudf::size_type max_block_size, bool has_nulls>
 __launch_bounds__(max_block_size) __global__
   void compute_column_kernel(table_device_view const table,
-                             device_ast_plan plan,
+                             ast::detail::expression_device_view device_expression_data,
                              mutable_column_device_view output_column)
 {
   // The (required) extern storage of the shared memory array leads to
@@ -64,11 +66,12 @@ __launch_bounds__(max_block_size) __global__
   IntermediateDataType<has_nulls>* intermediate_storage =
     reinterpret_cast<IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
 
-  auto thread_intermediate_storage = &intermediate_storage[threadIdx.x * plan.num_intermediates];
+  auto thread_intermediate_storage =
+    &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates];
   auto const start_idx = static_cast<cudf::size_type>(threadIdx.x + blockIdx.x * blockDim.x);
   auto const stride    = static_cast<cudf::size_type>(blockDim.x * gridDim.x);
-  auto evaluator =
-    cudf::ast::detail::expression_evaluator<has_nulls>(table, plan, thread_intermediate_storage);
+  auto evaluator       = cudf::ast::detail::expression_evaluator<has_nulls>(
+    table, device_expression_data, thread_intermediate_storage);
 
   for (cudf::size_type row_index = start_idx; row_index < table.num_rows(); row_index += stride) {
     auto output_dest = mutable_column_expression_result<has_nulls>(output_column);
@@ -89,19 +92,19 @@ std::unique_ptr<column> compute_column(table_view const table,
   auto const nullable  = cudf::nullable(table);
   auto const has_nulls = nullable && cudf::has_nulls(table);
 
-  auto const plan = ast_plan{expr, table, has_nulls, stream, mr};
+  auto const parser = ast::detail::expression_parser{expr, table, has_nulls, stream, mr};
 
   auto const output_column_mask_state =
     nullable ? (has_nulls ? mask_state::UNINITIALIZED : mask_state::ALL_VALID)
              : mask_state::UNALLOCATED;
 
   auto output_column = cudf::make_fixed_width_column(
-    plan.output_type(), table.num_rows(), output_column_mask_state, stream, mr);
+    parser.output_type(), table.num_rows(), output_column_mask_state, stream, mr);
   auto mutable_output_device =
     cudf::mutable_column_device_view::create(output_column->mutable_view(), stream);
 
   // Configure kernel parameters
-  auto const& dev_plan = plan.dev_plan;
+  auto const& device_expression_data = parser.device_expression_data;
   int device_id;
   CUDA_TRY(cudaGetDevice(&device_id));
   int shmem_limit_per_block;
@@ -109,22 +112,23 @@ std::unique_ptr<column> compute_column(table_view const table,
     cudaDeviceGetAttribute(&shmem_limit_per_block, cudaDevAttrMaxSharedMemoryPerBlock, device_id));
   auto constexpr MAX_BLOCK_SIZE = 128;
   auto const block_size =
-    dev_plan.shmem_per_thread != 0
-      ? std::min(MAX_BLOCK_SIZE, shmem_limit_per_block / dev_plan.shmem_per_thread)
+    device_expression_data.shmem_per_thread != 0
+      ? std::min(MAX_BLOCK_SIZE, shmem_limit_per_block / device_expression_data.shmem_per_thread)
       : MAX_BLOCK_SIZE;
-  auto const config          = cudf::detail::grid_1d{table.num_rows(), block_size};
-  auto const shmem_per_block = dev_plan.shmem_per_thread * config.num_threads_per_block;
+  auto const config = cudf::detail::grid_1d{table.num_rows(), block_size};
+  auto const shmem_per_block =
+    device_expression_data.shmem_per_thread * config.num_threads_per_block;
 
   // Execute the kernel
   auto table_device = table_device_view::create(table, stream);
   if (has_nulls) {
     cudf::ast::detail::compute_column_kernel<MAX_BLOCK_SIZE, true>
       <<<config.num_blocks, config.num_threads_per_block, shmem_per_block, stream.value()>>>(
-        *table_device, dev_plan, *mutable_output_device);
+        *table_device, device_expression_data, *mutable_output_device);
   } else {
     cudf::ast::detail::compute_column_kernel<MAX_BLOCK_SIZE, false>
       <<<config.num_blocks, config.num_threads_per_block, shmem_per_block, stream.value()>>>(
-        *table_device, dev_plan, *mutable_output_device);
+        *table_device, device_expression_data, *mutable_output_device);
   }
   CHECK_CUDA(stream.value());
   return output_column;
diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu
index c7a1630311b..1538780db5e 100644
--- a/cpp/src/join/conditional_join.cu
+++ b/cpp/src/join/conditional_join.cu
@@ -37,7 +37,7 @@ conditional_join(table_view left,
 {
   CUDF_FUNC_RANGE();
   return get_conditional_join_indices(
-    left, right, JoinKind, binary_predicate, compare_nulls, stream, mr);
+    left, right, binary_predicate, compare_nulls, JoinKind, stream, mr);
 }
 
 }  // namespace detail
diff --git a/cpp/src/join/conditional_join.cuh b/cpp/src/join/conditional_join.cuh
index 4602b7fefaa..6794fc89e9e 100644
--- a/cpp/src/join/conditional_join.cuh
+++ b/cpp/src/join/conditional_join.cuh
@@ -50,9 +50,9 @@ std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
 get_conditional_join_indices(table_view const& left,
                              table_view const& right,
-                             join_kind JoinKind,
-                             ast::expression binary_pred,
+                             ast::expression binary_predicate,
                              null_equality compare_nulls,
+                             join_kind JoinKind,
                              rmm::cuda_stream_view stream,
                              rmm::mr::device_memory_resource* mr)
 {
@@ -83,8 +83,9 @@ get_conditional_join_indices(table_view const& left,
   auto const nullable  = cudf::nullable(left) || cudf::nullable(right);
   auto const has_nulls = nullable && (cudf::has_nulls(left) || cudf::has_nulls(right));
 
-  auto const plan = ast::detail::ast_plan{binary_pred, left, right, has_nulls, stream, mr};
-  CUDF_EXPECTS(plan.output_type().id() == type_id::BOOL8,
+  auto const parser =
+    ast::detail::expression_parser{binary_predicate, left, right, has_nulls, stream, mr};
+  CUDF_EXPECTS(parser.output_type().id() == type_id::BOOL8,
                "The expression must produce a boolean output.");
 
   auto left_table  = table_device_view::create(left, stream);
@@ -95,7 +96,8 @@ get_conditional_join_indices(table_view const& left,
   CHECK_CUDA(stream.value());
   constexpr int block_size{DEFAULT_JOIN_BLOCK_SIZE};
   detail::grid_1d config(left_table->num_rows(), block_size);
-  auto const shmem_size_per_block = plan.dev_plan.shmem_per_thread * config.num_threads_per_block;
+  auto const shmem_size_per_block =
+    parser.device_expression_data.shmem_per_thread * config.num_threads_per_block;
 
   // Determine number of output rows without actually building the output to simply
   // find what the size of the output will be.
@@ -103,11 +105,21 @@ get_conditional_join_indices(table_view const& left,
   if (has_nulls) {
     compute_conditional_join_output_size<block_size, true>
       <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
-        *left_table, *right_table, KernelJoinKind, compare_nulls, plan.dev_plan, size.data());
+        *left_table,
+        *right_table,
+        KernelJoinKind,
+        compare_nulls,
+        parser.device_expression_data,
+        size.data());
   } else {
     compute_conditional_join_output_size<block_size, false>
       <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
-        *left_table, *right_table, KernelJoinKind, compare_nulls, plan.dev_plan, size.data());
+        *left_table,
+        *right_table,
+        KernelJoinKind,
+        compare_nulls,
+        parser.device_expression_data,
+        size.data());
   }
   CHECK_CUDA(stream.value());
 
@@ -124,8 +136,8 @@ get_conditional_join_indices(table_view const& left,
   auto left_indices  = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
   auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
 
-  const auto& join_output_l = left_indices->data();
-  const auto& join_output_r = right_indices->data();
+  auto const& join_output_l = left_indices->data();
+  auto const& join_output_r = right_indices->data();
   if (has_nulls) {
     conditional_join<block_size, DEFAULT_JOIN_CACHE_SIZE, true>
       <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
@@ -136,7 +148,7 @@ get_conditional_join_indices(table_view const& left,
         join_output_l,
         join_output_r,
         write_index.data(),
-        plan.dev_plan,
+        parser.device_expression_data,
         join_size);
   } else {
     conditional_join<block_size, DEFAULT_JOIN_CACHE_SIZE, false>
@@ -148,7 +160,7 @@ get_conditional_join_indices(table_view const& left,
         join_output_l,
         join_output_r,
         write_index.data(),
-        plan.dev_plan,
+        parser.device_expression_data,
         join_size);
   }
 
diff --git a/cpp/src/join/conditional_join_kernels.cuh b/cpp/src/join/conditional_join_kernels.cuh
index 3d34a49c5af..c6edf049d5d 100644
--- a/cpp/src/join/conditional_join_kernels.cuh
+++ b/cpp/src/join/conditional_join_kernels.cuh
@@ -20,7 +20,7 @@
 #include <join/join_common_utils.hpp>
 
 #include <cub/cub.cuh>
-#include <cudf/ast/detail/linearizer.hpp>
+#include <cudf/ast/detail/expression_parser.hpp>
 #include <cudf/ast/detail/transform.cuh>
 #include <cudf/ast/operators.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
@@ -42,16 +42,18 @@ namespace detail {
  * @param[in] right_table The right table
  * @param[in] JoinKind The type of join to be performed
  * @param[in] compare_nulls Controls whether null join-key values should match or not.
- * @param[in] plan Container of device data required to evaluate the desired expression.
+ * @param[in] device_expression_data Container of device data required to evaluate the desired
+ * expression.
  * @param[out] output_size The resulting output size
  */
 template <int block_size, bool has_nulls>
-__global__ void compute_conditional_join_output_size(table_device_view left_table,
-                                                     table_device_view right_table,
-                                                     join_kind JoinKind,
-                                                     null_equality compare_nulls,
-                                                     ast::detail::device_ast_plan plan,
-                                                     cudf::size_type* output_size)
+__global__ void compute_conditional_join_output_size(
+  table_device_view left_table,
+  table_device_view right_table,
+  join_kind JoinKind,
+  null_equality compare_nulls,
+  ast::detail::expression_device_view device_expression_data,
+  cudf::size_type* output_size)
 {
   // The (required) extern storage of the shared memory array leads to
   // conflicting declarations between different templates. The easiest
@@ -60,16 +62,17 @@ __global__ void compute_conditional_join_output_size(table_device_view left_tabl
   extern __shared__ char raw_intermediate_storage[];
   cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
     reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
-  auto thread_intermediate_storage = &intermediate_storage[threadIdx.x * plan.num_intermediates];
+  auto thread_intermediate_storage =
+    &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates];
 
   cudf::size_type thread_counter(0);
-  const cudf::size_type left_start_idx = threadIdx.x + blockIdx.x * blockDim.x;
-  const cudf::size_type left_stride    = blockDim.x * gridDim.x;
-  const cudf::size_type left_num_rows  = left_table.num_rows();
-  const cudf::size_type right_num_rows = right_table.num_rows();
+  cudf::size_type const left_start_idx = threadIdx.x + blockIdx.x * blockDim.x;
+  cudf::size_type const left_stride    = blockDim.x * gridDim.x;
+  cudf::size_type const left_num_rows  = left_table.num_rows();
+  cudf::size_type const right_num_rows = right_table.num_rows();
 
   auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
-    left_table, right_table, plan, thread_intermediate_storage, compare_nulls);
+    left_table, right_table, device_expression_data, thread_intermediate_storage, compare_nulls);
 
   for (cudf::size_type left_row_index = left_start_idx; left_row_index < left_num_rows;
        left_row_index += left_stride) {
@@ -118,7 +121,8 @@ __global__ void compute_conditional_join_output_size(table_device_view left_tabl
  * @param[out] join_output_r The right result of the join operation
  * @param[in,out] current_idx A global counter used by threads to coordinate
  * writes to the global output
- * @param plan Container of device data required to evaluate the desired expression.
+ * @param device_expression_data Container of device data required to evaluate the desired
+ * expression.
  * @param[in] max_size The maximum size of the output
  */
 template <cudf::size_type block_size, cudf::size_type output_cache_size, bool has_nulls>
@@ -129,8 +133,8 @@ __global__ void conditional_join(table_device_view left_table,
                                  cudf::size_type* join_output_l,
                                  cudf::size_type* join_output_r,
                                  cudf::size_type* current_idx,
-                                 cudf::ast::detail::device_ast_plan plan,
-                                 const cudf::size_type max_size)
+                                 cudf::ast::detail::expression_device_view device_expression_data,
+                                 cudf::size_type const max_size)
 {
   constexpr int num_warps = block_size / detail::warp_size;
   __shared__ cudf::size_type current_idx_shared[num_warps];
@@ -144,12 +148,13 @@ __global__ void conditional_join(table_device_view left_table,
   extern __shared__ char raw_intermediate_storage[];
   cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
     reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
-  auto thread_intermediate_storage = &intermediate_storage[threadIdx.x * plan.num_intermediates];
+  auto thread_intermediate_storage =
+    &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates];
 
-  const int warp_id                    = threadIdx.x / detail::warp_size;
-  const int lane_id                    = threadIdx.x % detail::warp_size;
-  const cudf::size_type left_num_rows  = left_table.num_rows();
-  const cudf::size_type right_num_rows = right_table.num_rows();
+  int const warp_id                    = threadIdx.x / detail::warp_size;
+  int const lane_id                    = threadIdx.x % detail::warp_size;
+  cudf::size_type const left_num_rows  = left_table.num_rows();
+  cudf::size_type const right_num_rows = right_table.num_rows();
 
   if (0 == lane_id) { current_idx_shared[warp_id] = 0; }
 
@@ -157,10 +162,10 @@ __global__ void conditional_join(table_device_view left_table,
 
   cudf::size_type left_row_index = threadIdx.x + blockIdx.x * blockDim.x;
 
-  const unsigned int activemask = __ballot_sync(0xffffffff, left_row_index < left_num_rows);
+  unsigned int const activemask = __ballot_sync(0xffffffff, left_row_index < left_num_rows);
 
   auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
-    left_table, right_table, plan, thread_intermediate_storage, compare_nulls);
+    left_table, right_table, device_expression_data, thread_intermediate_storage, compare_nulls);
 
   if (left_row_index < left_num_rows) {
     bool found_match = false;